#----------------------------------------------------------------------
# Name:         nsfg_networkanalysis_batch_step2.py
# Purpose:      This is the main program for the analysis.
#               Routes will be created only if two continuous GPX points are existed
#               within selected blocks and those adjacent.
#               Routes information is pulling from the start GPX points (i.e. time, speed etc).
#               Each sampled houses will be joined into routes if 
#                   a.	the distance between sampled house and road is within 1000 meters. 
#                   b.	Sampled houses address and road name are matched.
#                   c.	the routes go through the closest point of road from the sampled houses
#               The routes are joined to each sampled house by each interviewer and each interview dates.
#               Therefore, there will be duplicate sampled house record
#               in the output with different callnote or with different routes.
#
# Created:      2013-03-28
#----------------------------------------------------------------------

import arcpy
import datetime
import csv
import xlrd
import os
import shutil

### the list of selected_PSU and available GPX are a sensitive data, so remove from this version ###
available_GPX = []

def read_available_gpx (dir):
    folder = []
    for f in os.listdir(dir):
        if f[0:3] in available_GPX:
            folder.append(f[0:len(f)-4])
    return folder
    
#############################################
#####   Set up Input files and folders  #####
#############################################
dir_addrfeat = r"<Path>\addrfeat\redo" # Folder for Network files based on Census Tiger addrfeat shapefiles
dir_routes = r"<Path>\routes\q1" # Folder for GPX files snapped to the nearest road
spjoin = r'<Path>\routes\join_id_info\spjoin2csv.csv'
dir_callnote = r"<Path>\GPS Files\Calls\Q1" # Folder for call notes
coordinate_file = "<Path>\North America Albers Equal Area Conic.prj" # Project file location.
dir_original_gpx = r"<Path>\gpx\q1\xls" # Folder for original GPX excel file. It will be used to read PSU.
allhouses = r"<Path>\gpx\q1\geocoded_all_sampled.shp" # respondent location

folderList = read_available_gpx(dir_original_gpx)
print folderList

##### read csv and create list per line #####
infile =  open(spjoin,'rb')
d = {}
csvreader = csv.reader(infile, delimiter=',', quotechar='"')
p = 1
for row in csvreader:
    d[p] = {'combineID':row[0], 'Geo10':row[1]}
    p = p + 1
##print len(d)

for i in range(0,len(folderList)):
       
    IwFolder = folderList[i]
    print "current folder:" + IwFolder
    print str(datetime.datetime.now())
    IwerID = IwFolder[0:3]
    IwDate = IwFolder[4:13]
    tempTime = datetime.datetime.strptime(IwDate, '%Y%m%d')
    CallDate = str(tempTime.strftime("%d%b%Y")).upper()

    if not os.path.exists(dir_routes + "\\" + IwFolder):
        os.makedirs(dir_routes + "\\" + IwFolder)

    callnote = dir_callnote + "\\nsfgID_" + IwerID + "_" + CallDate + ".xls"
    #### select sample housing units based on the segments visited in callrecords (callnote) ####################
    dCall = {}
    c = 0
    wb = xlrd.open_workbook(callnote)
    sh = wb.sheet_by_name(u'sheet1')
    for rownum in range(1,sh.nrows):
        dCall[c] = {'vProjectId': sh.row_values(rownum,0,1)[0].encode('utf8'),
                    'vSamplelineId':sh.row_values(rownum,1,2)[0].encode('utf8'),
                    'nCallInfoId':sh.row_values(rownum,2,3)[0],
                    'sCallMode':sh.row_values(rownum,3,4)[0].encode('utf8'),
                    'sResultCodeId':sh.row_values(rownum,4,5)[0].encode('utf8'),
                    'dResultDate':sh.row_values(rownum,5,6)[0],
                    'tResultTime':sh.row_values(rownum,6,7)[0],
                    'sInterviewerEmployeeId':sh.row_values(rownum,7,8)[0].encode('utf8'),
                    'nsfgID':sh.row_values(rownum,8,9)[0].encode('utf8'),
                    'vAddressLine1':sh.row_values(rownum,9,10)[0].encode('utf8'),
                    'vAddressLine2':sh.row_values(rownum,10,11)[0].encode('utf8'),
                    'vCity':sh.row_values(rownum,11,12)[0].encode('utf8'),
                    'sState':sh.row_values(rownum,12,13)[0].encode('utf8'),
                    'sPostalCode':sh.row_values(rownum,13,14)[0].encode('utf8'),
                    'Rdate':'',
                    'Rtime':'',
                    'IwerID':IwerID,
                    'IwDate':IwDate,
                    'Section':sh.row_values(rownum,1,2)[0].encode('utf8')[0:7]}
        c = c + 1

    # SectionList will be used to join with selected blocks
    SectionList = []
    for key in dCall.keys():
        if dCall[key]['Section'] not in SectionList:
            SectionList.append(dCall[key]['Section'])

    # find state, county and blocks for each section
    d_Sec = {}
    for s in range(0,len(SectionList)):
        Section  = SectionList[s]
        for key in d.keys():
            if d[key]['combineID'][0:7] == Section and d[key]['Geo10'] != '':
                if Section not in d_Sec:
                    d_Sec[Section] = {'state':'', 'county':'', 'block':[]}
                d_Sec[Section]['state'] = d[key]['Geo10'][0:2]
                d_Sec[Section]['county'] = d[key]['Geo10'][2:5]
                if d[key]['Geo10'][11:15] not in d_Sec[Section]['block']:
                    d_Sec[Section]['block'].append(d[key]['Geo10'][11:15])

    # iterate each section
    for s in range(0,len(SectionList)):
        road_network = dir_addrfeat + "\\addrfeat_selected_" + d_Sec[SectionList[s]]['state'] + d_Sec[SectionList[s]]['county'] + "_identify_ND.nd"
        gpx_selected = dir_routes + "\\" + IwFolder + "\\gpx_selected_" + IwFolder + "_s" + str(s) + "_snap.shp"
        if not os.path.exists(gpx_selected):
            origGPXshape = dir_routes_old + "\\old\\" + IwFolder + "\\gpx_selected_" + IwFolder + "_s" + str(s) + "_snap.shp"
            arcpy.Copy_management(origGPXshape, gpx_selected)
            print "gpx_selected_" + IwFolder + "_s" + str(s) + "_snap.shp" + " is copied"

        arcpy.CheckOutExtension("Network")

        # IdList will help to find whether there is a route. Route will be made from two continuous number (i.e. 61 and 62)
        IdList= []
        rows = arcpy.SearchCursor(gpx_selected) 
        for row in rows:
            ID = row.trkpt_ORDI
            IdList.append(ID)
        del rows
        print IdList
     
        if len(IdList) > 1 :
            mergeList = []
            for t in range(0,len(IdList)-1):
                # create temp folder
                mypath = dir_routes + "\\" + IwFolder + "\\s" + str(s) + "\\temp" + str(int(IdList[t]))
                if not os.path.isdir(mypath):
                    os.makedirs(mypath)
                
                # select 2 gpx points to create individual routes
                tempLayer = mypath + "\\" + IwDate + "_gpx_snap_" + str(int(IdList[t])) + ".shp"
                if IdList[t+1] == IdList[t] + 1:
                    whereclause = '"trkpt_ORDI" = ' + str(IdList[t]) + ' OR "trkpt_ORDI" = ' + str(IdList[t+1])
                    if os.path.isfile(tempLayer):
                        arcpy.Delete_management(tempLayer)
                    arcpy.Select_analysis(gpx_selected, tempLayer, whereclause)

                    # create route
                    route_name = mypath + "\\route" + str(int(IdList[t]))
                    if os.path.isfile(route_name):
                        arcpy.Delete_management(route_name)
                    arcpy.MakeRouteLayer_na(road_network,route_name,"Length")

                    # create table view
                    gpx_table =  mypath + "\\" + IwDate + "_gpx_snap_" + str(int(IdList[t])) + ".dbf"
                    table_view = mypath + "\\tview" + str(int(IdList[t]))
                    if os.path.isfile(table_view):
                        arcpy.Delete_management(table_view)
                    arcpy.MakeTableView_management(gpx_table, table_view)

                    # add location to the route and make route
                    arcpy.AddLocations_na(route_name,"Stops",table_view,"","", "trkpt_ORDI")
                    failed = 0
                    try:
                        arcpy.Solve_na(route_name)
                    except:
                        print "solve:" + str(int(IdList[t])) + " is failed"
                        failed = 1
                                                          
                    if failed != 1:
                        # save route as layer
                        out_route = mypath + "\\route_test" + str(int(IdList[t])) + ".lyr"
                        if os.path.isfile(out_route):
                            arcpy.Delete_management(out_route)
                        arcpy.SaveToLayerFile_management(route_name, out_route, "ABSOLUTE")

                        # export Route layer to shapefile 
                        merge_route = mypath + "\\route" + str(int(IdList[t])) + "\\Routes"
                        output_features_class = mypath + "\\merge" + str(int(IdList[t])) + ".shp" 
                        out_coordinate_system = coordinate_file
                        arcpy.Project_management(merge_route, output_features_class, out_coordinate_system)
                        
                        # add field in Route shapefile
                        arcpy.AddField_management(output_features_class, "gpxID", "LONG", 9)
                        
                        # update field in Route shapefile
                        uprows = arcpy.UpdateCursor(output_features_class) 
                        for uprow in uprows:
                            uprow.gpxID = int(IdList[t])
                            uprows.updateRow(uprow) 
                        del uprows

                        # put output in the merge list
                        mergeList.append(output_features_class)

                        print "route" + str(int(IdList[t])) + " is completed"

                else:
                    print "gpx" + str(IdList[t]) + " does not have routes"

            for t in range(0,len(IdList)-1):
                mypath = dir_routes + "\\" + IwFolder + "\\s" + str(s) + "\\temp" + str(int(IdList[t]))
                if IdList[t+1] == IdList[t] + 1:
                    gpx_table =  mypath + "\\" + IwDate + "_gpx_snap_" + str(int(IdList[t])) + ".dbf"
                    table_view = mypath + "\\tview" + str(int(IdList[t]))
                    # delete temp files
                    arcpy.Delete_management(gpx_table)
                    arcpy.Delete_management(table_view)
            print "temp view are deleted"

            # merge all files
            outLayer = dir_routes + "\\" + IwFolder + "\\out_" + IwerID + "_" + IwDate + "_identify" + "_s" + str(s+1) + ".shp"
            if os.path.isfile(outLayer):
                arcpy.Delete_management(outLayer)
            if len(mergeList) != 0:
                arcpy.Merge_management(mergeList, outLayer)
                print "merge is done"

                # Calculate the closet distance from road with address matched
                origshape = dir_routes + "\\" + IwFolder+ "\\sample" + IwerID + "_" + IwDate[4:8] + "_s" + str(s+1) + ".shp"
                select_equation = "\"vSamplelin\" = '" + SectionList[s] + "'"
                print select_equation
                arcpy.Select_analysis(allhouses, origshape, select_equation)
                inshape = dir_routes + "\\" + IwFolder+ "\\sample" + IwerID + "_" + IwDate[4:8] + "_s" + str(s+1) + "_prj.shp"
                arcpy.Project_management(origshape, inshape, coordinate_file)
                outtbl = dir_routes + "\\" + IwFolder + "\\tbl_" + IwFolder + "_s" + str(s+1)
                outshape = dir_routes + "\\" + IwFolder + "\\samplehouse_spjoin_routes_" + IwFolder + "_s" + str(s+1) + ".shp"
                joinshape= dir_addrfeat + "\\addrfeat_selected_" + d_Sec[SectionList[s]]['state'] + d_Sec[SectionList[s]]['county'] + "_identify.shp"

                # Below only looks 3 closest point within 1000 Meters 
                arcpy.GenerateNearTable_analysis(inshape, joinshape, outtbl, "1000 Meters", "LOCATION", "ANGLE", "ALL", 10)
                # find matched address and location
                addressD = {}
                tblrows = arcpy.SearchCursor(outtbl)
                for tr in tblrows:
                    key = tr.Rowid
                    if key not in addressD:
                        addressD[key]= {"IN_FID":'', "IN_STNAME":'', "NEAR_FID":'', "NEAR_DIST":0,
                                        "NEAR_X":0, "NEAR_Y":0,"NEAR_ANGLE":0,"NEAR_STNAME":'',
                                        "vSamplelineId":'', "vLine_num":'',"lat":0, "long":0,
                                        "combineID":'', "sampled":0, "callnote":'',"percent":0, "matched":0}
                    addressD[key]["IN_FID"] = tr.IN_FID
                    addressD[key]["NEAR_FID"] = tr.NEAR_FID
                    addressD[key]["NEAR_DIST"] = tr.NEAR_DIST
                    addressD[key]["NEAR_X"] = tr.NEAR_X
                    addressD[key]["NEAR_Y"] = tr.NEAR_Y
                    addressD[key]["NEAR_ANGLE"] = tr.NEAR_ANGLE 
                del tblrows
                houserows = arcpy.SearchCursor(inshape)
                for hr in houserows:
                    IN_FID = hr.FID 
                    for key in addressD.keys():
                        if IN_FID == addressD[key]["IN_FID"]:
                            addressD[key]["IN_STNAME"] = hr.vStreet_Na
                            addressD[key]["vSamplelineId"] = hr.vsamplelin
                            addressD[key]["vLine_num"] = hr.vLine_num
                            addressD[key]["lat"] = hr.nLatitude
                            addressD[key]["long"] = hr.nlongitude
                            addressD[key]["combineID"] = hr.combineID
                            addressD[key]["sampled"] = hr.Sampled
                del houserows
                roadrows = arcpy.SearchCursor(joinshape)
                for rr in roadrows:
                    NEAR_FID = rr.FID 
                    for key in addressD.keys():
                        if NEAR_FID == addressD[key]["NEAR_FID"]:
                            addressD[key]["NEAR_STNAME"] = rr.FULLNAME
                del roadrows
                matchedD = {}
                for key in addressD.keys():
                    inaddr = addressD[key]["IN_STNAME"].upper()
                    nearaddr = addressD[key]["NEAR_STNAME"].upper()
                    if inaddr in nearaddr:
                        if addressD[key]["IN_FID"] not in matchedD:
                           matchedD[addressD[key]["IN_FID"]] = [[addressD[key]["NEAR_FID"], addressD[key]["NEAR_DIST"]]]
                        matchedD[addressD[key]["IN_FID"]].append([addressD[key]["NEAR_FID"], addressD[key]["NEAR_DIST"]])
                print len(matchedD)
                notmatchedAtAll = {}
                for key in matchedD.keys():
                    for k in addressD.keys():
                        if addressD[k]["IN_FID"] not in matchedD:
                            if addressD[k]["IN_FID"] not in notmatchedAtAll:
                                notmatchedAtAll[addressD[k]["IN_FID"]] = [[addressD[k]["NEAR_FID"], addressD[k]["NEAR_DIST"]]]
                            notmatchedAtAll[addressD[k]["IN_FID"]].append([addressD[k]["NEAR_FID"], addressD[k]["NEAR_DIST"]])
                for key in matchedD.keys():
                    matchedD[key] = sorted(matchedD[key], key=lambda k: k[1])
                    for k in addressD.keys():
                        if len(matchedD[key]) > 1:
                            if key == addressD[k]["IN_FID"] and matchedD[key][0][0] == addressD[k]["NEAR_FID"]:
                                addressD[k]["matched"] = '2'
                        elif len(matchedD[key]) == 1:
                            addressD[k]["matched"] = '1'
                        else:
                            addressD[k]["matched"] = 'none'

                arcpy.AddField_management(outtbl, "IN_STNAME", "TEXT")
                arcpy.AddField_management(outtbl, "NEAR_STNAME", "TEXT")
                arcpy.AddField_management(outtbl, "VSAMPLELINEID", "TEXT")
                arcpy.AddField_management(outtbl, "VLINE_NUM", "TEXT")
                arcpy.AddField_management(outtbl, "LAT", "DOUBLE")
                arcpy.AddField_management(outtbl, "LONG", "DOUBLE")
                arcpy.AddField_management(outtbl, "COMBINEID", "TEXT")
                arcpy.AddField_management(outtbl, "SAMPLED", "LONG")
                arcpy.AddField_management(outtbl, "CALLNOTE", "TEXT")
                arcpy.AddField_management(outtbl, "MATCHED", "TEXT")
                outtblrows = arcpy.UpdateCursor(outtbl)
                for outtr in outtblrows:
                    for key in addressD.keys():
                        if key == outtr.Rowid:
                            outtr.IN_STNAME = addressD[key]["IN_STNAME"]
                            outtr.NEAR_STNAME = addressD[key]["NEAR_STNAME"]
                            outtr.vSamplelineId = addressD[key]["vSamplelineId"]
                            outtr.vLine_num = addressD[key]["vLine_num"]
                            outtr.lat = addressD[key]["lat"]
                            outtr.long = addressD[key]["long"]
                            outtr.combineID = addressD[key]["combineID"]
                            outtr.sampled = addressD[key]["sampled"]
                            outtr.matched = addressD[key]["matched"]
                            outtblrows.updateRow(outtr)
                del outtblrows
                uptblrows = arcpy.UpdateCursor(outtbl)
                for uptr in uptblrows:
                    for c in dCall.keys():
                        combineid = dCall[c]['vSamplelineId'][0:11]
                        if combineid == uptr.combineID:
                            uptr.callnote = IwerID + "_" + IwDate[4:8]
                            uptblrows.updateRow(uptr)
##                del uptr
                del uptblrows
                print "address matched tbl is created"
                # Select only matched address and update NEAR_DIST field
                out_tblLayer = dir_routes + "\\" + IwFolder + "\\tbl_" + IwFolder + "_s" + str(s+1) + "_s_layer"
                arcpy.MakeXYEventLayer_management(outtbl, "NEAR_X", "NEAR_Y", out_tblLayer)
                houseonaddr = dir_routes + "\\" + IwFolder + "\\pntaddr_" + IwFolder + "_s" + str(s+1) + ".shp"
                arcpy.FeatureToPoint_management(out_tblLayer, houseonaddr, "CENTROID")
                whereclause = '"MATCHED" = \'1\' OR "MATCHED" = \'2\' '
                houseonaddr_matched = dir_routes + "\\" + IwFolder + "\\pntaddr_" + IwFolder + "_s" + str(s+1) + "_matched.shp"
                arcpy.Select_analysis(houseonaddr, houseonaddr_matched, whereclause)
                print "house points on road are created"

                # Spatial join between each house and each route
                mergeList = []
                matchrows = arcpy.SearchCursor(houseonaddr_matched) 
                for matchrow in matchrows: 
                    FID = matchrow.FID
                    Sampled = matchrow.SAMPLED
                    if Sampled == 1:
                        jointemp = dir_routes + "\\" + IwFolder + "\\join" + str(FID) + ".shp"
                        outtemp = dir_routes + "\\" + IwFolder + "\\out" + str(FID) + ".shp"
                        whereclause = '"FID" = ' + str(FID)
                        print whereclause
                        arcpy.Select_analysis (houseonaddr_matched, jointemp, whereclause)
                        arcpy.SpatialJoin_analysis(jointemp, outLayer, outtemp, "JOIN_ONE_TO_MANY", "KEEP_ALL", "", "INTERSECT")
                        mergeList.append(outtemp)
                        arcpy.Delete_management(jointemp)
                del matchrows

                if notmatchedAtAll != {}:
                    ############# Export as CSV from Dictionary ######################
                    f = open(dir_routes + "\\" + IwFolder + "\\notmatched" + str(s+1) + ".csv", 'wb')
                    cw = csv.writer(f, delimiter=',')
                    ##### Write headers
                    cw.writerow(['key'])
                    ##### Write data
                    for k in notmatchedAtAll.keys():
                        cw.writerow([k])
    
                # Merge all spatial join information between house and route
                if len(mergeList) != 0 :
                    arcpy.Merge_management(mergeList, outshape)
                for m in range(0,len(mergeList)):
                    arcpy.Delete_management(mergeList[m])
                print "outtemp is deleted"
                arcpy.Delete_management(inshape)
                arcpy.Delete_management(houseonaddr)
                arcpy.Delete_management(houseonaddr_matched)

        else:
            print "No GPX exists within selected blocks:" + IwFolder + ", " + SectionList[s]
                    
    print str(datetime.datetime.now())
    print "done folder:" + IwFolder
    
print "All done"
