# Convert LA building footprints and addresses into importable OSM files.
from lxml import etree
from lxml.etree import tostring
from shapely.geometry import asShape, Point, LineString
from sys import argv, exit, stderr
from glob import glob
from merge import merge
import re
from decimal import Decimal, getcontext
from multiprocessing import Pool
import json
from rtree import index
import ntpath
import osm_tags

debug = True

# Adjust precision for buffer operations
getcontext().prec = 16

# Converts given buildings into corresponding OSM XML files.
def convert(buildingsFile, osmOut):
    with open(buildingsFile) as f:
        features = json.load(f)
    allAddresses = {}
    buildings = []
    buildingShapes = []
    buildingIdx = index.Index()

    # Returns the coordinates for this address
    def keyFromAddress(address):
        return str(address['geometry']['coordinates'][0]) + "," + str(address['geometry']['coordinates'][1])

    for feature in features:
        if feature['geometry']['type'] == 'Polygon' or feature['geometry']['type'] == 'MultiPolygon':
            extra_tags = osm_tags.get_osm_tags(feature)
            feature['properties']['osm'] = extra_tags
            buildings.append(feature)
            shape = asShape(feature['geometry'])
            buildingShapes.append(shape)
            buildingIdx.add(len(buildingShapes) - 1, shape.bounds)

        # These are the addresses that don't overlap any buildings
        elif feature['geometry']['type'] == 'Point':
            # The key is the coordinates of this address. Track how many addresses share these coords.
            key = keyFromAddress(feature)
            if key in allAddresses:
                allAddresses[key].append(feature)
            else:
                allAddresses[key] = [feature]

        else:
            print "geometry of unknown type:", feature['geometry']['type']

    # Generates a new osm id.
    osmIds = dict(node = -1, way = -1, rel = -1)
    def newOsmId(type):
        osmIds[type] = osmIds[type] - 1
        return osmIds[type]

    ## Formats multi part house numbers
    def formatHousenumber(p):
        def suffix(part1, part2, hyphen_type=None):
            #part1 = stripZeroes(part1)
            if not part2:
                return str(part1)
            #part2 = stripZeroes(part2)
            return str(part1) + ' ' + str(part2)
        #def stripZeroes(addr): # strip leading zeroes from numbers
        #    if addr.isdigit():
        #        addr = str(int(addr))
        #    if '-' in addr:
        #        try:
        #            addr2 = addr.split('-')
        #            if len(addr2) == 2:
        #                addr = str(int(addr2[0])) + '-' + str(int(addr2[1])).zfill(2)
        #        except:
        #            pass
        #    return addr
        number = suffix(p['Number'], p['NumSuffix'])
        if p['NumPrefix']:
            number = p['NumPrefix'] + number
        return number

    # Converts an address
    def convertAddress(address):
        result = dict()
        if all (k in address for k in ('Number', 'StreetName')):
            if address['Number']:
                result['addr:housenumber'] = formatHousenumber(address)
            if address['StreetName']:

                # Titlecase
                streetname = address['StreetName'].title()
                if address['StArticle']:
                    streetname = address['StArticle'].title() + " " + streetname
                if address['PreType']:
                    streetname = address['PreType'].title() + " " + streetname
                if address['PreDir']:
                    streetname = address['PreDir'].title() + " " + streetname
                if address['PreMod']:
                    streetname = address['PreMod'].title() + " " + streetname
                if address['PostType']:
                    streetname = streetname + " " + address['PostType'].title()
                if address['PostDir']:
                    streetname = streetname + " " + address['PostDir'].title()
                if address['PostMod']:
                    streetname = streetname + " " + address['PostMod'].title()

                # Fix titlecase on 1St, 2Nd, 3Rd, 4Th, etc
                streetname = re.sub(r"(.*)(\d+)St\s*(.*)", r"\1\2st \3", streetname)
                streetname = re.sub(r"(.*)(\d+)Nd\s*(.*)", r"\1\2nd \3", streetname)
                streetname = re.sub(r"(.*)(\d+)Rd\s*(.*)", r"\1\2rd \3", streetname)
                streetname = re.sub(r"(.*)(\d+)Th\s*(.*)", r"\1\2th \3", streetname)

                # Expand 'St ' -> 'Saint'
                # relevant for:
                #   'St Clair'
                #   'St Louis'
                #   'St James'
                #   'St James Park'
                #   'St Andrews'
                #   'St Nicolas'
                #   'St Cloud'
                #   'St Ambrose'
                #   'St Bonaventure'
                #   'St Joseph'
                #   'St Tropez'
                if streetname[0:3] == 'St ': streetname = 'Saint ' + streetname[3:]
                # Middle name expansions
                streetname = streetname.replace(' St ', ' Street ')
                streetname = streetname.replace(' Rd ', ' Road ')
                streetname = streetname.replace(' Blvd ', ' Boulevard ')
                result['addr:street'] = streetname
            if address['PCITY1']:
                result['addr:city'] = address['PCITY1'].title()
            elif address['LegalComm']:
                result['addr:city'] = address['LegalComm'].title()
            if address['ZipCode']:
                result['addr:postcode'] = str(int(address['ZipCode']))
            if address['UnitName']:
                result['addr:unit'] = address['UnitName']
        return result

    # Distills coincident addresses into one address where possible.
    # Takes an array of addresses and returns an array of 1 or more addresses
    def distillAddresses(addresses):
        # Only distill addresses if the following conditions are true:
        # 1) the addresses share the same coordinates.
        # AND
        # 2a) all the attributes are the same _except_ the unit number/name
        # OR
        # 2b) the street number is the same but the street names are referring to the same thing

        outputAddresses = []

        # First, group the addresses into separate lists for each unique location
        addressesByCoords = {}
        for address in addresses:
            key = keyFromAddress(address)
            if key in addressesByCoords:
                addressesByCoords[key].append(address)
            else:
                addressesByCoords[key] = [address]

        # loop over unique coordinates
        for key in addressesByCoords:
            # Here see if we can collapse any of these addresses at the same coords.

            # addressesByCoords[key] is an array of addresses at this location.

            # We are only looking for the 2 possibilities above (2a) and (2b).
            # If the situation is more complicated, change nothing.
            outputAddresses.extend(distillAddressesAtPoint(addressesByCoords[key]))

        return outputAddresses

    # This function is called by distillAddresses.
    # It assumes all addresses are at the same coordinates.
    # Returns an array of 1 or more addresses
    def distillAddressesAtPoint(addresses):

        if len(addresses) == 1:
            return addresses

        firstAddress = addresses[0]

        # (2a) If the first address is an apartment, see if all the rest are too.

        # NOTE: sometimes an apartment building has a few address points that lack a UnitName...
        # ...so checking for the presence of UnitName in firstAddress wouldn't always work.
        props = firstAddress['properties']
        if debug: print "Testing to see if these are apartments...", '\t'.join([str(props['Number']), str(props['NumSuffix']), str(props['PreType']), str(props['StreetName']), str(props['PostType']), str(props['UnitName'])])
        # Compare subsequent addresses in the array to the first address.
        # Hence, range starts at 1.
        for i in range(1, len(addresses)):
            if not areSameAddressExceptUnit(firstAddress, addresses[i]):
                props = addresses[i]['properties']
                if debug: print "No, this address was different...........", '\t'.join([str(props['Number']), str(props['NumSuffix']), str(props['PreType']), str(props['StreetName']), str(props['PostType']), str(props['UnitName'])])
                #print firstAddress
                #print addresses[i]
                break
            # else, keep going

        else: # else for the `for` statement. Executes only if `break` never did.
            # We checked them all, and they're all the same except UnitName.
            # In this case the apartment data is useless to OSM because the
            # apartment nodes are all on top of each other.
            # So, discard the unit information and return just one address.
            firstAddress['properties']['UnitName'] = None
            if debug: print "Yes they were apartments! Collapsed", len(addresses), "into one"
            return [firstAddress]

        # (2b) Check if the street number is all the same.
        # For this, we use a list of alternative names (like HWY 1, etc)...
        # ...and we need to know which canonical name to keep.
        if debug: print "Testing to see if the street names are synonyms.."
        canonicalStreetName = None
        for i in range(1, len(addresses)):
            props = addresses[i]['properties']
            if not areSameAddressExceptStreet(firstAddress, addresses[i]):
                if debug: print "No, this address was different...........", '\t'.join([str(props['Number']), str(props['NumSuffix']), str(props['PreType']), str(props['StreetName']), str(props['PostType']), str(props['UnitName'])])
                #print firstAddress
                #print addresses[i]
                break
            compoundStreetName = (str(props['PreType']),str(props['StreetName']),str(props['PostType']))
            currentCanonicalStreetName = getCanonicalName(compoundStreetName)
            if currentCanonicalStreetName:
                if debug: print "found canonical name", currentCanonicalStreetName
                if ((currentCanonicalStreetName == canonicalStreetName) or (canonicalStreetName == None)):
                    canonicalStreetName = currentCanonicalStreetName
                else:
                    if debug: print "canonicalStreetNames didn't match:", canonicalStreetName, currentCanonicalStreetName
                    break
            else:
                print "couldn't find canonicalStreetName for", compoundStreetName
                break

        else: # else for the `for` statement. Executes only if `break` never did.
            # We checked them all, and they're all the same except StreetName.
            # If we can determine that they are all the same synonym, we can
            # overwrite the other streetname information and return just one address.
            firstAddress['properties']['PreType'] = canonicalStreetName[0]
            firstAddress['properties']['StreetName'] = canonicalStreetName[1]
            firstAddress['properties']['PostType'] = canonicalStreetName[2]
            if debug: print "Yes they were synonyms! Collapsed", len(addresses), "into one"
            return [firstAddress]

        # This is only excuted if neither of the two `else` statements executed 
        # for the two `for` statements above. That means we were unable to collapse
        # separate apartments into one, or collapse synonymous street names into one.
        # So, instead of returning just one address, we fail and return all of them.
        return addresses

    def areSameAddressExceptUnit(a1, a2):
        for key in ['NumPrefix', 'Number', 'NumSuffix', 'PreMod', 'PreDir', 'PreType', 'StArticle', 'StreetName', 'PostType', 'PostDir', 'PostMod', 'ZipCode', 'LegalComm', 'PCITY1']:
            if a1['properties'][key] != a2['properties'][key]:
                #print key, a1['properties'][key], "!=", a2['properties'][key]
                return False
        return True

    def areSameAddressExceptStreet(a1, a2):
        for key in ['NumPrefix', 'Number', 'NumSuffix', 'PreMod', 'PreDir', 'StArticle', 'UnitName', 'PostDir', 'PostMod', 'ZipCode', 'LegalComm', 'PCITY1']:
            if a1['properties'][key] != a2['properties'][key]:
                #print key, a1['properties'][key], "!=", a2['properties'][key]
                return False
        return True

    # Sometimes we have identical addresses that differ only by street name.
    # Usually these are because the street name is also a highway. We want to 
    # remove all the highway names and only use the street name for the address
    canonicalNames = {
        ("None", "LINCOLN", "BOULEVARD"): (None, "LINCOLN", "BOULEVARD"),
        ("ROUTE", "1", "None"): (None, "LINCOLN", "BOULEVARD"),
        ("HIGHWAY", "1", "None"): (None, "LINCOLN", "BOULEVARD"),
        ("None", "SR-1", "None"): (None, "LINCOLN", "BOULEVARD"),
        ("None", "PCH", "None"): (None, "LINCOLN", "BOULEVARD"),
    }

    def getCanonicalName(compoundStreetName):
        result = None
        try:
            result = canonicalNames[compoundStreetName]
        except KeyError:
            return None
        return result

    # Appends new node or returns existing if exists.
    nodes = {}
    def appendNewNode(coords, osmXml):
        rlon = int(float(coords[0]*10**7))
        rlat = int(float(coords[1]*10**7))
        if (rlon, rlat) in nodes:
            return nodes[(rlon, rlat)]
        node = etree.Element('node', visible = 'true', id = str(newOsmId('node')))
        node.set('lon', str(Decimal(coords[0])*Decimal(1)))
        node.set('lat', str(Decimal(coords[1])*Decimal(1)))
        nodes[(rlon, rlat)] = node
        osmXml.append(node)
        return node

    # Sometimes we want to force overlapping nodes, such as with addresses.
    # This way they'll show up in JOSM and the contributor can deal with them manually.
    # Otherwise, we might try to apply multiple address tags to the same node...
    # ...which is also incorrect, but harder to detect.
    def appendNewNodeIgnoringExisting(coords, osmXml):
        rlon = int(float(coords[0]*10**7))
        rlat = int(float(coords[1]*10**7))
        #if (rlon, rlat) in nodes:
        #    return nodes[(rlon, rlat)]
        node = etree.Element('node', visible = 'true', id = str(newOsmId('node')))
        node.set('lon', str(Decimal(coords[0])*Decimal(1)))
        node.set('lat', str(Decimal(coords[1])*Decimal(1)))
        nodes[(rlon, rlat)] = node
        osmXml.append(node)
        return node

    def appendNewWay(coords, intersects, osmXml):
        way = etree.Element('way', visible='true', id=str(newOsmId('way')))
        firstNid = 0
        for i, coord in enumerate(coords):
            if i == 0: continue # the first and last coordinate are the same
            node = appendNewNode(coord, osmXml)
            if i == 1: firstNid = node.get('id')
            way.append(etree.Element('nd', ref=node.get('id')))

            # Check each way segment for intersecting nodes
            int_nodes = {}
            try:
                line = LineString([coord, coords[i+1]])
            except IndexError:
                line = LineString([coord, coords[1]])
            for idx, c in enumerate(intersects):
                if line.buffer(0.000001).contains(Point(c[0], c[1])) and c not in coords:
                    t_node = appendNewNode(c, osmXml)
                    for n in way.iter('nd'):
                        if n.get('ref') == t_node.get('id'):
                            break
                    else:
                        int_nodes[t_node.get('id')] = Point(c).distance(Point(coord))
            for n in sorted(int_nodes, key=lambda key: int_nodes[key]): # add intersecting nodes in order
                way.append(etree.Element('nd', ref=n))
            
        way.append(etree.Element('nd', ref=firstNid)) # close way
        osmXml.append(way)
        return way

    # Appends an address to a given node or way.
    def appendAddress(address, element):
    #    # Need to check if these tags already exist on this element
        for k, v in convertAddress(address['properties']).iteritems():
            # TODO: is this doing anything useful?
            #for child in element:
            #    if child.tag == 'tag':
            #        #print k, v
            #        if child.attrib.get('k') == k:
            #            print "found key", k
            #            if child.attrib.get('v') == v:
            #                print "found matching value", v
           element.append(etree.Element('tag', k=k, v=v))

    # Appends a building to a given OSM xml document.
    def appendBuilding(building, shape, address, osmXml):
        # Check for intersecting buildings
        intersects = []
        for i in buildingIdx.intersection(shape.bounds):
            try:
                for c in buildingShapes[i].exterior.coords:
                    if Point(c[0], c[1]).buffer(0.000001).intersects(shape):
                        intersects.append(c)
            except AttributeError:
                for c in buildingShapes[i][0].exterior.coords:
                    if Point(c[0], c[1]).buffer(0.000001).intersects(shape):
                        intersects.append(c)

        # Export building, create multipolygon if there are interior shapes.
        interiors = []
        try:
            way = appendNewWay(list(shape.exterior.coords), intersects, osmXml)
            for interior in shape.interiors:
                interiors.append(appendNewWay(list(interior.coords), [], osmXml))
        except AttributeError:
            way = appendNewWay(list(shape[0].exterior.coords), intersects, osmXml)
            for interior in shape[0].interiors:
                interiors.append(appendNewWay(list(interior.coords), [], osmXml))
        if len(interiors) > 0:
            relation = etree.Element('relation', visible='true', id=str(newOsmId('way')))
            relation.append(etree.Element('member', type='way', role='outer', ref=way.get('id')))
            for interior in interiors:
                relation.append(etree.Element('member', type='way', role='inner', ref=interior.get('id')))
            relation.append(etree.Element('tag', k='type', v='multipolygon'))
            osmXml.append(relation)
            way = relation
        for tag in building['properties']['osm']:
            value = building['properties']['osm'][tag]
            way.append(etree.Element('tag', k=tag, v=value))
        # if 'GeneralUse' in building['properties']:
        #     way.append(etree.Element('tag', k='building', v=building['properties']['GeneralUse']))
        # else:
        #     way.append(etree.Element('tag', k='building', v='yes'))
        # if 'SpecificUs' in building['properties']:
        #     way.append(etree.Element('tag', k='building:use', v=building['properties']['GeneralUse']))
        if 'YearBuilt' in building['properties'] and building['properties']['YearBuilt'] is not None:
            YearBuilt = int(building['properties']['YearBuilt'])
            if YearBuilt > 0:
                    way.append(etree.Element('tag', k='start_date', v=str(YearBuilt)))
        # if 'Specific_1' in building['properties']:
        #         way.append(etree.Element('tag', k='amenity', v=building['properties']['Specific_1']))
        if 'Units' in building['properties'] and building['properties']['Units'] is not None:
            units = int(round(float(building['properties']['Units']), 0))
            if units > 0:
                way.append(etree.Element('tag', k='building:units', v=str(units)))
        if 'HEIGHT' in building['properties']:
            height = round(((building['properties']['HEIGHT'] * 12) * 0.0254), 1)
            if height > 0:
                way.append(etree.Element('tag', k='height', v=str(height)))
        if 'ELEV' in building['properties']:
            elevation = round(((building['properties']['ELEV'] * 12) * 0.0254), 1)
            if elevation > 0:
                way.append(etree.Element('tag', k='ele', v=str(elevation)))
        if 'BLD_ID' in building['properties']:
            way.append(etree.Element('tag', k='lacounty:bld_id', v=str(building['properties']['BLD_ID'])))
        if 'AIN' in building['properties'] and building['properties']['AIN'] is not None:
            way.append(etree.Element('tag', k='lacounty:ain', v=str(building['properties']['AIN'])))
#        if address:
#            appendAddress(address, way)

    # Export buildings & addresses. Only export address with building if there is exactly
    # one address per building. Export remaining addresses as individual nodes.
    # The remaining addresses are added to a dictionary hashed by their coordinates.
    # This way we catch any addresses that have the same coordinates.
    osmXml = etree.Element('osm', version='0.6', generator='[email protected]')
    for i in range(0, len(buildings)):

        buildingAddresses = []
        for address in buildings[i]['properties']['addresses']:
            buildingAddresses.append(address)
        address = None
        if len(buildingAddresses) == 1:
            # There's only one address in the building footprint
            address = buildingAddresses[0]
        elif len(buildingAddresses) > 1:
            # If there are multiple addresses, first try to distill them.
            # If we can distill them to one address, we can still add it to this building.
            distilledAddresses = distillAddresses(buildingAddresses)
            if len(distilledAddresses) == 1:
                # We distilled down to one address. Add it to the building.
                address = distilledAddresses[0]
            else:
                # We could not distilled down to one address. Instead export as nodes.
                for address in distilledAddresses:
                    # The key is the coordinates of this address. Track how many addresses share these coords.
                    key = keyFromAddress(address)
                    if key in allAddresses:
                        allAddresses[key].append(address)
                    else:
                        allAddresses[key] = [address]

        appendBuilding(buildings[i], buildingShapes[i], address, osmXml)


    # Export any addresses that aren't the only address for a building.
    if (len(allAddresses) > 0):

        # Iterate over the list of distinct coordinates found in the address data
        for coordskey in allAddresses:
            # if a distinct coordinate has only one associated address,
            # then export that address as a new node
            if len(allAddresses[coordskey]) == 1:
                address = allAddresses[coordskey][0]
                coordinates = address['geometry']['coordinates']
#                node = appendNewNode(coordinates, osmXml) # returns old node if one exists at these coords
#                appendAddress(address, node)

            # If there is more than one address at these coordinates, do something.
            # ...but do what exactly?
            else:
                distilledAddresses = distillAddresses(allAddresses[coordskey])
                if len(distilledAddresses) == 1:
                    # We distilled down to one address. Append it.
                    address = distilledAddresses[0]
                    coordinates = address['geometry']['coordinates']
#                    node = appendNewNode(coordinates, osmXml) # returns old node if one exists at these coords
#                    appendAddress(address, node)
                else:
                    if debug: print "found duplicate coordinates that could not be distilled:", coordskey, "has", len(allAddresses[coordskey]), "addresses"
                    if debug: print '\t'.join(["num", "numsufx", "pretype", "street", "posttype", "unit"])
                    for address in distilledAddresses:
                        # TODO: do something smart here. These are overlapping addresses that we couldn't distill.
                        # TODO: maybe jitter them, or leave stacked but with FIXME?
                        # TODO: For now, we use appendNewNodeIgnoringExisting to pile the nodes on top of each other.
                        #print address
                        props = address['properties']
                        if debug: print '\t'.join([str(props['Number']), str(props['NumSuffix']), str(props['PreType']), str(props['StreetName']), str(props['PostType']), str(props['UnitName'])])
                        coordinates = address['geometry']['coordinates']
#                        node = appendNewNodeIgnoringExisting(coordinates, osmXml) # Force overlapping nodes so JOSM will catch them
#                        appendAddress(address, node)

    with open(osmOut, 'w') as outFile:
        outFile.writelines(tostring(osmXml, pretty_print=True, xml_declaration=True, encoding='UTF-8'))
        print 'Exported ' + osmOut

def prep(fil3):
    matches = re.match('^(.*)\..*?$', ntpath.basename(fil3)).groups(0)
    convert(fil3, 'osm/%s.osm' % matches[0])

if __name__ == '__main__':
    # for easier debugging
    for filename in argv[1:]:
        prep(filename)
    # pool = Pool()
    # pool.map(prep, argv[1:])
    # pool.close()
    # pool.join()