bin/inventory-import.py
changeset 59 de6abcbd3c03
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/inventory-import.py	Thu Jan 20 23:21:14 2011 +0200
@@ -0,0 +1,238 @@
+#!/usr/bin/env python
+
+"""
+    Import inventory table from .csv
+"""
+
+from svv import application
+from svv import database as db
+from svv.items import Item
+
+import csv
+
+import optparse, logging
+
+log = logging.getLogger('inventory-import')
+
+def load_csv (path, encoding='utf-8') :
+    """
+        Load CSV data from given fs path, yielding the data as unicode strings.
+    """
+
+    reader = csv.reader(open(path, 'r'))
+
+    for row in reader :
+        # decode
+        yield [cell.decode(encoding) for cell in row]
+
+def parse_csv (csv,
+    header_rows=3,
+    header_columns=[u"Sijainti", u"Artikkeli", u"Lkm", u"Selite"],
+    column_offset=0,
+    column_count=4,
+    normalize_qty=True,
+) :
+    """
+        Parse item data from given csv:
+            
+            csv             - the csv.Reader object
+
+            header_rows     - number of column header row
+            header_columns  - set of column headers to detect
+            column_offset   - number of columns to skip
+            column_count    - number of columns to use
+            normalize_qty   - items with qty=1 aren't countable
+        
+        Yields a series of
+
+            (row_id, parent_row, name, qty, descr)
+
+        tuples.
+    """
+
+    # column def
+    column_def = ['location', 'name', 'qty', 'descr']
+
+    # row_id's for loaded items
+    row_by_name = {}
+
+
+    for row, columns in enumerate(csv) :
+        # fix up row number
+        row += 1
+
+        # fix up columns
+        columns = columns[column_offset:column_offset + column_count]
+
+        # header
+        if row < header_rows :
+            # skip
+            continue
+
+        elif row == header_rows :
+            # check header
+            if columns != header_columns :
+                # fail
+                raise Exception("Header mismatch, excepting %r, got %r on row %r" % (header_columns, columns, row))
+
+            # ok
+            continue
+
+        elif all(not col.strip() for col in columns) :
+            # empty row
+            continue
+        
+        elif len(columns) != len(column_def) :
+            # bad column count
+            raise Exception("Column count mismatch, expecting %r, got %r on row %r" % (column_def, columns, row))
+        
+        # load column
+        parent, name, qty, descr = columns
+        
+        # normalize
+        parent = parent.strip()
+        name = name.strip()
+        qty = qty.strip()
+        descr = descr.strip()
+        
+        # warn on whitespace
+        if [parent, name, qty, descr] != columns :
+            log.warning("Extra whitespace on row %d", row)
+
+        # convert
+        try :
+            qty = int(qty) if qty else None
+
+        except ValueError :
+            raise Exception("Invalid item quantity, got %r on row %r" % (qty, row))
+
+        # normalize quantity
+        if qty == 0 :
+            # skip
+            log.warning("Skipping qty=0 item %r on row %d", name, row)
+            
+        elif qty == 1 and normalize_qty :
+            # one item is a single item
+            qty = None
+
+        # map
+        if not parent :
+            parent = None
+
+        elif parent not in row_by_name :
+            raise Exception("Invalid location, got %r on row %r" % (parent, row))
+
+        else :
+            # lookup row_id
+            parent = row_by_name[parent]
+        
+        # debug
+        log.debug("Load %d: %s %s x%s -> %d", row, name, descr, qty, parent)
+        
+        # store
+        row_by_name[name] = row
+
+        # yield
+        yield row, parent, name, qty, descr
+
+def create_items (session, items) :
+    """
+        Create the given items and add them to the session
+    """
+
+    # lookup parents
+    items_by_row = {}
+
+    for row, parent, name, qty, descr in items :
+        # look up parent
+        if parent :
+            parent = items_by_row[parent]
+
+        # build
+        item = Item(name, descr, qty, parent)
+
+        # store
+        items_by_row[row] = item
+
+        # add
+        session.add(item)
+
+    # number of items added
+    return len(items_by_row)
+
+def main () :
+    parser = optparse.OptionParser(usage="Usage: %prog [options] CSV")
+
+    parser.add_option('-q', '--quiet', action='store_true', help='More output')
+    parser.add_option('-v', '--verbose', action='store_true', help='More output')
+    parser.add_option('-D', '--debug', action='store_true', help="Even more output (SQL queries)")
+
+    parser.add_option('-d', '--database', help="Database connection URI", metavar='URL')
+    parser.add_option(      '--init-database', action='store_true', help="Initialize database (CREATE)")
+
+    parser.add_option('-C', '--charset', help="CSV charset", metavar='CHARSET', default='utf-8')
+
+    parser.add_option('--dry-run', action='store_true', help="Execute INSERTs but don't actually commit()")
+
+    (options, args) = parser.parse_args()
+
+    # CSV from args
+    csv_path, = args
+    csv_encoding = options.charset
+    
+    # defaults
+    level = logging.INFO
+    sql_echo = False
+
+    if options.quiet :
+        level = logging.WARN
+
+    if options.verbose :
+        level = logging.DEBUG
+    
+    if options.debug :
+        level = logging.DEBUG
+        sql_echo = True
+
+    logging.basicConfig(format="[%(levelname)5s] %(funcName)25s : %(message)s", level=level)
+
+    if sql_echo :
+        # echo SQL queries
+        logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
+
+    # app state
+    app = application.Application(
+            options.database,
+    )
+
+    # init?
+    if options.init_database :
+        app.create_tables()
+
+    # load CSV data
+    csv = load_csv(csv_path, encoding=csv_encoding)
+
+    # parse info
+    item_data = list(parse_csv(csv))
+    
+    # db session
+    session = app.session()
+
+    # inserts
+    count = create_items(session, item_data)
+    
+    if options.dry_run :
+        log.info("Rolling back %d INSERTs for --dry-run...", count)
+
+        # revert
+        session.rollback()
+
+    else :
+        log.info("Committing %d new items..", count)
+
+        # ok, commit
+        session.commit()
+
+if __name__ == '__main__' :
+    main()
+