bin/inventory-import.py
author Tero Marttila <terom@fixme.fi>
Thu, 20 Jan 2011 23:21:14 +0200
changeset 59 de6abcbd3c03
permissions -rwxr-xr-x
items: add inventory-import script, and tweak listing
#!/usr/bin/env python

"""
    Import inventory table from .csv
"""

from svv import application
from svv import database as db
from svv.items import Item

import csv

import optparse, logging

log = logging.getLogger('inventory-import')

def load_csv (path, encoding='utf-8') :
    """
        Load CSV data from given fs path, yielding the data as unicode strings.
    """

    reader = csv.reader(open(path, 'r'))

    for row in reader :
        # decode
        yield [cell.decode(encoding) for cell in row]

def parse_csv (csv,
    header_rows=3,
    header_columns=[u"Sijainti", u"Artikkeli", u"Lkm", u"Selite"],
    column_offset=0,
    column_count=4,
    normalize_qty=True,
) :
    """
        Parse item data from given csv:
            
            csv             - the csv.Reader object

            header_rows     - number of column header row
            header_columns  - set of column headers to detect
            column_offset   - number of columns to skip
            column_count    - number of columns to use
            normalize_qty   - items with qty=1 aren't countable
        
        Yields a series of

            (row_id, parent_row, name, qty, descr)

        tuples.
    """

    # column def
    column_def = ['location', 'name', 'qty', 'descr']

    # row_id's for loaded items
    row_by_name = {}


    for row, columns in enumerate(csv) :
        # fix up row number
        row += 1

        # fix up columns
        columns = columns[column_offset:column_offset + column_count]

        # header
        if row < header_rows :
            # skip
            continue

        elif row == header_rows :
            # check header
            if columns != header_columns :
                # fail
                raise Exception("Header mismatch, excepting %r, got %r on row %r" % (header_columns, columns, row))

            # ok
            continue

        elif all(not col.strip() for col in columns) :
            # empty row
            continue
        
        elif len(columns) != len(column_def) :
            # bad column count
            raise Exception("Column count mismatch, expecting %r, got %r on row %r" % (column_def, columns, row))
        
        # load column
        parent, name, qty, descr = columns
        
        # normalize
        parent = parent.strip()
        name = name.strip()
        qty = qty.strip()
        descr = descr.strip()
        
        # warn on whitespace
        if [parent, name, qty, descr] != columns :
            log.warning("Extra whitespace on row %d", row)

        # convert
        try :
            qty = int(qty) if qty else None

        except ValueError :
            raise Exception("Invalid item quantity, got %r on row %r" % (qty, row))

        # normalize quantity
        if qty == 0 :
            # skip
            log.warning("Skipping qty=0 item %r on row %d", name, row)
            
        elif qty == 1 and normalize_qty :
            # one item is a single item
            qty = None

        # map
        if not parent :
            parent = None

        elif parent not in row_by_name :
            raise Exception("Invalid location, got %r on row %r" % (parent, row))

        else :
            # lookup row_id
            parent = row_by_name[parent]
        
        # debug
        log.debug("Load %d: %s %s x%s -> %d", row, name, descr, qty, parent)
        
        # store
        row_by_name[name] = row

        # yield
        yield row, parent, name, qty, descr

def create_items (session, items) :
    """
        Create the given items and add them to the session
    """

    # lookup parents
    items_by_row = {}

    for row, parent, name, qty, descr in items :
        # look up parent
        if parent :
            parent = items_by_row[parent]

        # build
        item = Item(name, descr, qty, parent)

        # store
        items_by_row[row] = item

        # add
        session.add(item)

    # number of items added
    return len(items_by_row)

def main () :
    parser = optparse.OptionParser(usage="Usage: %prog [options] CSV")

    parser.add_option('-q', '--quiet', action='store_true', help='More output')
    parser.add_option('-v', '--verbose', action='store_true', help='More output')
    parser.add_option('-D', '--debug', action='store_true', help="Even more output (SQL queries)")

    parser.add_option('-d', '--database', help="Database connection URI", metavar='URL')
    parser.add_option(      '--init-database', action='store_true', help="Initialize database (CREATE)")

    parser.add_option('-C', '--charset', help="CSV charset", metavar='CHARSET', default='utf-8')

    parser.add_option('--dry-run', action='store_true', help="Execute INSERTs but don't actually commit()")

    (options, args) = parser.parse_args()

    # CSV from args
    csv_path, = args
    csv_encoding = options.charset
    
    # defaults
    level = logging.INFO
    sql_echo = False

    if options.quiet :
        level = logging.WARN

    if options.verbose :
        level = logging.DEBUG
    
    if options.debug :
        level = logging.DEBUG
        sql_echo = True

    logging.basicConfig(format="[%(levelname)5s] %(funcName)25s : %(message)s", level=level)

    if sql_echo :
        # echo SQL queries
        logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)

    # app state
    app = application.Application(
            options.database,
    )

    # init?
    if options.init_database :
        app.create_tables()

    # load CSV data
    csv = load_csv(csv_path, encoding=csv_encoding)

    # parse info
    item_data = list(parse_csv(csv))
    
    # db session
    session = app.session()

    # inserts
    count = create_items(session, item_data)
    
    if options.dry_run :
        log.info("Rolling back %d INSERTs for --dry-run...", count)

        # revert
        session.rollback()

    else :
        log.info("Committing %d new items..", count)

        # ok, commit
        session.commit()

if __name__ == '__main__' :
    main()