process_kml

#! /usr/bin/env python
"""
Process an KML file downloaded from Google Maps Engine Lite and process it to
use on Maverick by:
- Download images and place then in a 'photo' directory
"""

from __future__ import absolute_import, print_function

import argparse
import codecs
import os
import shutil
import urllib.parse
import urllib.request
import zipfile

from lxml import etree, objectify
from pykml import parser as kml_parser

# Prevent adding a namespace to every new element
KML = objectify.ElementMaker(
    annotate=False,
    namespace=None,
    nsmap=None,
)


TEXT_ELEMENTS = [
    'description', 'text', 'linkDescription', 'displayName', 'Data/*',
]


def get_xml_with_cdata(obj, cdata_elements):
    """Render CDATA tags in the given document."""

    # Convert Objectify document to lxml.etree (is there a better way?)
    root = etree.fromstring(etree.tostring(etree.ElementTree(obj)))

    # Create an xpath expression to search for all desired cdata elements
    xpath = '|'.join([
        '//kml:' + tag
        for tag in cdata_elements
    ])

    results = root.xpath(
        xpath,
        namespaces={'kml': 'http://www.opengis.net/kml/2.2'},
    )

    for element in results:
        element.text = etree.CDATA(element.text)

    return root


def get_parsed_file(filename):
    """
    Return a parsed file
    """
    return kml_parser.parse(open(filename, 'r')).getroot()


def check_for_dir(path):
    """
    Check if the given path exists, otherwise, create it
    """
    if not os.path.exists(path):
        os.makedirs(path)

    return path


def download_pic(url, img_name, dest_dirs):
    """
    Download an image from the given url and store it on the specified dir

    :param url: URL to the image to download.
    :param img_name: Name to use to store the file.
    :param dest_dirs: List of directories to store the image.
    """
    if ' ' in url:
        url = url.split(' ')[0]

    path = urllib.parse.urlparse(url).path
    extension = os.path.splitext(path)[1]

    # TODO: Infer filetype.
    if '?' in url:
        # Lets assume it's 'jpg'
        extension = 'jpg'

    filename = '{0}.{1}'.format(img_name, extension)

    first_dir = dest_dirs[0]
    filepath = os.path.join(first_dir, filename)

    # If the file already exists, don't download it again
    if not os.path.isfile(filepath):
        print(u'Downloading: {0}'.format(url))

        try:
            urllib.request.urlretrieve(url, filepath)
        except urllib.error.URLError:
            print('Image not found.')

    if os.path.isfile(filepath):
        # If there are more directories, copy the image to them.
        for other_dir in dest_dirs[1:]:
            shutil.copy2(filepath, os.path.join(other_dir, filename))

    return filename


def parse_args():
    """Parse arguments from command line."""

    parser = argparse.ArgumentParser(
        description='Download images referenced in a KML file',
    )
    parser.add_argument(
        '--maverick',
        action='store_true',
        help='Save pictures in a `photos` directory.')

    parser.add_argument(
        'input_files',
        nargs='+',
        default='',
        help='Path to files to process',
    )

    args = parser.parse_args()

    return args


def init_maverick(filepath, filename):
    """Initialize the directory for outputting Maverick files."""

    path = os.path.join(os.path.dirname(filepath), '%s_maverick' % filename)

    return check_for_dir(os.path.join(path, 'photos'))


def init_kmz(filepath, filename):
    """Initialize the directory for outputting the KMZ content."""

    return os.path.join(os.path.dirname(filepath), '%s_kmz' % filename)


def process_placemark(placemark, dirs):
    """Download images defined in the placemark."""

    name = placemark.name.text
    print(name)

    try:
        extended_data = placemark.ExtendedData
    except AttributeError:
        return

    media_elements = extended_data.findall(
        './/*[@name="gx_media_links"]'
    )

    for media in media_elements:
        url = media.value.text
        print(url)

        picname = download_pic(url, name, dirs)

        # Add new data.
        img = '<img src="photos/{filename}">'.format(
            filename=picname,
        )
        extended_data.append(
            KML.Data(KML.value(img), name='pictures'))

    extended_data.append(
        KML.Data(KML.value(placemark.description.text), name='notes'))

    placemark.description = KML.description('{img}{desc}'.format(
        img=img,
        desc=placemark.description,
    ))


def zipdir(filename, path):
    """Zip a directory."""

    with zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(path):
            for file in files:
                zipf.write(os.path.join(root, file))


def kmz_sequence_name(filename):
    """Determine a sequenced filename to store the new KMZ."""

    seq = 0
    pattern = '%s%d.kmz'
    new_filename = pattern % (filename, seq)

    while os.path.exists(new_filename):
        seq += 1
        new_filename = pattern % (filename, seq)

    return new_filename


def main():
    """
    Main application
    """

    args = parse_args()

    for filepath in args.input_files:
        filepath = os.path.abspath(filepath)
        filename, extension = os.path.splitext(filepath)

        if extension.lower() not in ('.kml', '.kmz'):
            print('Invalid file type. Skipping.')
            continue

        kmz_dir = init_kmz(filepath, filename)
        kmz_photos_dir = check_for_dir(os.path.join(kmz_dir, 'photos'))

        dirs = [kmz_photos_dir]

        if args.maverick:
            dirs.append(init_maverick(filepath, filename))

        if extension.lower() == '.kml':
            # Copy KML file.
            shutil.copy2(filepath, os.path.join(kmz_dir, 'doc.kml'))
        else:
            # Extract files from KMZ.
            kmz_instance = zipfile.ZipFile(filepath)
            kmz_instance.extractall(kmz_dir)

        kmz_filepath = os.path.join(kmz_dir, 'doc.kml')
        doc = get_parsed_file(kmz_filepath)

        print('Downloading pictures for {0}'.format(filepath))

        try:
            placemarks = doc.Document.Folder.Placemark
        except AttributeError:
            placemarks = doc.Document.Placemark

        for placemark in placemarks:
            process_placemark(placemark, dirs)

        # Save KMZ file.
        with codecs.open(kmz_filepath, 'w', 'utf-8') as kmz_file:
            kmlobj_with_cdata = get_xml_with_cdata(doc, TEXT_ELEMENTS)
            kmz_file.write(etree.tostring(
                kmlobj_with_cdata, pretty_print=True,
            ).decode('utf-8'))

        zipdir(kmz_sequence_name(filename), kmz_dir)


if __name__ == "__main__":
    main()
    print('Process finished.')