#!/usr/bin/env python
"""Publish a CSV file to a Google Spreadsheet sheet.

This is a convenient script to update an existing Google Spreadsheet document
with the contents of a CSV file.

For example:

  upload-csv-to-google-sheet 19cwo....XFtgM apples.csv oranges.csv

By default, this updates (overwrites) the first two worksheets in the
spreadsheet with the contents of the given CSV files. If you want to update
another sheet or index, you can add them to the filenames, like this:

  upload-csv-to-google-sheet 19cwo....XFtgM apples.csv:MySheet oranges.csv:2

This would override the sheet "MySheet" with the contents of "apples.csv" and
the third sheet with the contents of "oranges.csv". Note that the sheets are
1-indexed, not 0-indexed.


You will need to have an installation of the following libraries for this to
work:

 * gdata-python-client
 * oauth2client
 * httplib2

Moreover, you will need to enable the Google Sheets API in the developer console
and download the Client Secrets that Google provides to ~/.google-apis.json. (You
can override this location with the GOOGLE_APIS environment variable.)


Note: This script has to run under Python 2.x unlike the other scripts provided
in this package because gdata (the old "gdata-python-client" XML-based APIs) has
not yet been ported to Python 3, and it looks abandoned by Google in favor of
the discovery apis (i.e., "google-api-python-client").

Beancount is only implemented using Python 3.x, so we will support exporting the
list of holdings to a temporary CSV file and uploading it automatically to a
spreadsheet using a variant of this script. The CSV data format should be enough
of a bridge for us to get our work done.

"""
__copyright__ = "Copyright (C) 2013-2018  Martin Blais"
__license__ = "GNU GPLv2"

import argparse
import bisect
import csv
import codecs
import logging
import os
import re
import unittest
from os import path

import oauth2client.client
from oauth2client import tools
from oauth2client.file import Storage
import httplib2

import gdata.spreadsheets.client as sheets_client
import gdata.spreadsheets.data as sheets_data
import gdata.gauth


#-------------------------------------------------------------------------------
# gauth.py


DEFAULT_SECRETS_FILENAME = os.environ.get(
    'GOOGLE_APIS', path.join(os.environ['HOME'], '.google-apis.json'))
DEFAULT_STORAGE_FILENAME = path.join(os.environ['HOME'], '.oauth2-google-api')


def get_argparser(**kwds):
    """Create an argument parser for connnecting to the Google Drive API.

    You may further add arguments to this.

    Args:
      parser: An instance of an argparse parser.
    Returns:
      A suitable ArgumentParser object.
    """
    parser = argparse.ArgumentParser(parents=[tools.argparser], **kwds)

    parser.add_argument('--secrets', action='store',
                        default=DEFAULT_SECRETS_FILENAME,
                        help="Secrets filename")

    parser.add_argument('--storage', action='store',
                        default=DEFAULT_STORAGE_FILENAME,
                        help="Storage filename")

    return parser


def get_authenticated_http(scopes, args):
    """Authenticate via oauth2 and cache credentials to a file.

    If the credentials are already available in the 'storage' cache file, this
    function will not require user interaction, it will simply return the cached
    credentials; otherwise, it opens up a browser window for the user to accept
    the access and obtain the credentials.

    Args:
      scopes: A string or a list of strings, the scopes to get credentials for.
      args: An argparse option values object, as retrurned by parse_args().
        This arguments value object must include attributes for secrets_filename
        and storage_filename as per get_argparser().
    Returns:
      An authenticated http client object, from which you can use the Google
      APIs.
    """
    # secrets_filename: A string, the filename that contains information
    #   identifying the client application and secret (Note: this is not the
    #   credentials/token).
    secrets_filename = args.secrets

    # storage_filename: A string, a path to the filename where to cache the
    #   credentials between runs.
    storage_filename = args.storage

    # Create a flow from a secrets file.
    scope = ' '.join(scopes) if isinstance(scopes, list) else scopes
    flow = oauth2client.client.flow_from_clientsecrets(secrets_filename, scope)
    flow.redirect_uri = oauth2client.client.OOB_CALLBACK_URN

    # Create a transport, disable SSL certificates, which fails to validate.
    http = httplib2.Http()

    # Create a storage to cache the credentials for future runs, and look it up.
    storage = Storage(storage_filename)
    credentials = storage.get()
    if credentials is None:
        # Save and restore the logger level, because the flow somehow overrides it.
        saved_log_level = logging.getLogger().level
        try:
            # If the credentials haven't been found, run the flow. This will pop-up
            # a web browser window for you to accept.
            credentials = tools.run_flow(flow, storage, args, http=http)
        finally:
            logging.getLogger().setLevel(saved_log_level)

    # Authorize using the transport and return it.
    credentials.authorize(http)

    # Refresh the access token if necessary.
    if credentials.access_token_expired:
        credentials.refresh(http)

    return http, credentials


# gauth.py
#-------------------------------------------------------------------------------


def get_spreadsheets_client(args):
    """Connect and create a SpreadsheetsClient object.

    Args:
      args: An argparse values object.
    Returns:
      An instance of SpreadsheetsClient.
    """
    # Connect, with authentication.
    scopes = ['https://spreadsheets.google.com/feeds']
    http, credentials = get_authenticated_http(scopes, args)

    # Create a spreadsheet client.
    gd_client = sheets_client.SpreadsheetsClient()
    gd_client.auth_token = gdata.gauth.OAuth2TokenFromCredentials(credentials)

    return gd_client


def csv_unicode_reader(source, **kwargs):
    """Temporarily encode unicode source to UTF8 because CSV does not support unicode.

    Args:
      source: An iterable sequence of unicode objects.
      **kwargs: Arguments for csv.reader().
    Yields:
      Rows of unicode objects.
    """
    lineiter = (line.encode('utf-8') for line in source)
    csv_reader = csv.reader(lineiter, **kwargs)
    for row in csv_reader:
        yield [cell.decode('utf-8') for cell in row]


def csv_to_batch_update(filename, batch):
    """Convert a CSV file to a batch update.

    This creates cell update requests for all available data and updates of
    empty cells where cells aren't present.

    Args:
      filename: A string, the CSV filename to open and parse.
      batch: A CellsFeed instance.
    Returns:
      A pair of (row_count, col_count), the size of the CSV batch update /
      table.

    """
    # Open the CSV file and loop over the values, creating a batch request.
    csvfile = codecs.open(filename, 'r', encoding='utf-8-sig')
    max_cols = 0
    locations = set()
    irow = 0
    for irow, row in enumerate(csv_unicode_reader(csvfile), 1):
        #logging.info("Row: %s", row)
        for icol, value in enumerate(row, 1):
            batch.add_set_cell(irow, icol, value)
            locations.add((irow, icol))
        max_cols = max(max_cols, icol)
    max_rows = irow

    # Add empty cells.
    for irow in range(max_rows):
        for icol in range(max_cols):
            location = (irow, icol)
            if location not in locations:
                #logging.info("Empty cell at %s,%s", irow, icol)
                batch.add_set_cell(irow, icol, '')

    return (max_rows, max_cols)


def enumerate_arguments(args, use_filenames=True):
    """Parse the filenames and their optional worksheet names or numbers.

    Args:
      args: A list of "filename:worksheet-id" or "filename:index" that specifies
        CSV files to be uploaded and which worksheet id (name) or index to upload
        them to.
      use_filenames: A boolean, if true, we use the filenames instead of the
        indexes to create new sheet names.
    Returns:
      A list of string filenames and a list of names or integer index for each
      of the spreadsheets.
    """
    filenames = []
    names = []
    for arg in args:
        match = re.match('(.*):(.*)$', arg)
        if match:
            filename, name_or_index = match.groups()
            # If the name is an integer, treat it as a worksheet index.
            if re.match('\d+', name_or_index):
                name_or_index = int(name_or_index) - 1
                if name_or_index < 0:
                    raise SystemExit("Invalid index for '{}'; "
                                     "sheet indexes start at 1.".format(filename))
        else:
            filename = arg
            if use_filenames:
                name_or_index = path.splitext(path.basename(filename))[0]
            else:
                name_or_index = None  # To be allocated below.

        filenames.append(filename)
        names.append(name_or_index)

    # Compute a list of unused ids.
    available_ids = iter(set(range(len(args))) - set(name
                                                     for name in names
                                                     if isinstance(name, int)))

    # Compute the list of numbered pairs.
    names = [(next(available_ids) if name is None else name)
             for name in names]

    # Check that the set of names or indexes is unique.
    if len(set(names)) < len(names):
        raise SystemExit('Error: Indexes {} are not unique.'.format(names))

    return filenames, names


def find_or_create_worksheets(gd_client, docid, names):
    """Find or create all the required worksheets and resolve them to worksheet-ids.

    Args:
      docid: A string, the document id.
      names_or_indexes: A list of strings or integers, the worksheet names or ids.
    Returns:
      A list of worksheets corresponding to the input names.
    """
    wslist = []
    for name in names:
        # Get the worksheet id.
        if isinstance(name, int):
            index = name
            name = "Sheet {:d}".format(index)
            worksheets = gd_client.get_worksheets(docid)
        else:
            assert isinstance(name, str)
            query = sheets_client.WorksheetQuery(name)
            worksheets = gd_client.get_worksheets(docid, query=query)
            index = 0

        try:
            ws = worksheets.entry[index]
        except IndexError:
            # We have to create at least this many sheets.
            for i in range(len(worksheets.entry), index+1):
                logging.info("Creating worksheet: %s", name)
                gd_client.add_worksheet(docid, name, 1, 1)

            # Try again.
            worksheets = gd_client.get_worksheets(docid)
            ws = worksheets.entry[index]

        wslist.append(ws)

    return wslist


def main():
    logging.basicConfig(level=logging.INFO, format='%(levelname)-8s: %(message)s')

    parser = get_argparser(description=__doc__.strip())

    parser.add_argument('docid', action='store',
                        help="Spreadsheets doc id to update")

    parser.add_argument('filenames', nargs='+', action='store',
                        help="CSV filenames[:worksheet] to upload")

    parser.add_argument('--no-use-filenames',
                        dest='use_filenames', action='store_false', default=True,
                        help="Don't use the basenames in creating default sheet names")

    args = parser.parse_args()
    docid = args.docid

    # Connect and create a client.
    gd_client = get_spreadsheets_client(args)

    logging.info("Resolving indexes for each of the input files.")
    filenames, names = enumerate_arguments(args.filenames, args.use_filenames)

    logging.info("Find or create the worksheets.")
    wslist = find_or_create_worksheets(gd_client, docid, names)

    logging.info("Processing each worksheet.")
    for ws, filename in zip(wslist, filenames):
        title = ws.title.text

        logging.info("[%-32.32s] Loading CSV data and creating batch update.", title)
        batch = sheets_data.build_batch_cells_update(docid, ws.get_worksheet_id())
        nrows, ncols = csv_to_batch_update(filename, batch)

        if nrows == 0:
            logging.warn("[%-32.32s] No data, empty file. Skipping.", title)
            continue

        logging.info("[%-32.32s] Resizing and clearing.", title)
        ws.row_count.text = str(nrows)
        ws.col_count.text = str(ncols)
        gd_client.update(ws)

        logging.info("[%-32.32s] Uploading data to spreadsheet.", title)
        gd_client.batch(batch, force=True)

    logging.info("Done.")


if __name__ == '__main__':
    main()
