bin/inventory-import.py
changeset 59 de6abcbd3c03
equal deleted inserted replaced
58:4f4150296cd3 59:de6abcbd3c03
       
     1 #!/usr/bin/env python
       
     2 
       
     3 """
       
     4     Import inventory table from .csv
       
     5 """
       
     6 
       
     7 from svv import application
       
     8 from svv import database as db
       
     9 from svv.items import Item
       
    10 
       
    11 import csv
       
    12 
       
    13 import optparse, logging
       
    14 
       
    15 log = logging.getLogger('inventory-import')
       
    16 
       
    17 def load_csv (path, encoding='utf-8') :
       
    18     """
       
    19         Load CSV data from given fs path, yielding the data as unicode strings.
       
    20     """
       
    21 
       
    22     reader = csv.reader(open(path, 'r'))
       
    23 
       
    24     for row in reader :
       
    25         # decode
       
    26         yield [cell.decode(encoding) for cell in row]
       
    27 
       
    28 def parse_csv (csv,
       
    29     header_rows=3,
       
    30     header_columns=[u"Sijainti", u"Artikkeli", u"Lkm", u"Selite"],
       
    31     column_offset=0,
       
    32     column_count=4,
       
    33     normalize_qty=True,
       
    34 ) :
       
    35     """
       
    36         Parse item data from given csv:
       
    37             
       
    38             csv             - the csv.Reader object
       
    39 
       
    40             header_rows     - number of column header row
       
    41             header_columns  - set of column headers to detect
       
    42             column_offset   - number of columns to skip
       
    43             column_count    - number of columns to use
       
    44             normalize_qty   - items with qty=1 aren't countable
       
    45         
       
    46         Yields a series of
       
    47 
       
    48             (row_id, parent_row, name, qty, descr)
       
    49 
       
    50         tuples.
       
    51     """
       
    52 
       
    53     # column def
       
    54     column_def = ['location', 'name', 'qty', 'descr']
       
    55 
       
    56     # row_id's for loaded items
       
    57     row_by_name = {}
       
    58 
       
    59 
       
    60     for row, columns in enumerate(csv) :
       
    61         # fix up row number
       
    62         row += 1
       
    63 
       
    64         # fix up columns
       
    65         columns = columns[column_offset:column_offset + column_count]
       
    66 
       
    67         # header
       
    68         if row < header_rows :
       
    69             # skip
       
    70             continue
       
    71 
       
    72         elif row == header_rows :
       
    73             # check header
       
    74             if columns != header_columns :
       
    75                 # fail
       
    76                 raise Exception("Header mismatch, excepting %r, got %r on row %r" % (header_columns, columns, row))
       
    77 
       
    78             # ok
       
    79             continue
       
    80 
       
    81         elif all(not col.strip() for col in columns) :
       
    82             # empty row
       
    83             continue
       
    84         
       
    85         elif len(columns) != len(column_def) :
       
    86             # bad column count
       
    87             raise Exception("Column count mismatch, expecting %r, got %r on row %r" % (column_def, columns, row))
       
    88         
       
    89         # load column
       
    90         parent, name, qty, descr = columns
       
    91         
       
    92         # normalize
       
    93         parent = parent.strip()
       
    94         name = name.strip()
       
    95         qty = qty.strip()
       
    96         descr = descr.strip()
       
    97         
       
    98         # warn on whitespace
       
    99         if [parent, name, qty, descr] != columns :
       
   100             log.warning("Extra whitespace on row %d", row)
       
   101 
       
   102         # convert
       
   103         try :
       
   104             qty = int(qty) if qty else None
       
   105 
       
   106         except ValueError :
       
   107             raise Exception("Invalid item quantity, got %r on row %r" % (qty, row))
       
   108 
       
   109         # normalize quantity
       
   110         if qty == 0 :
       
   111             # skip
       
   112             log.warning("Skipping qty=0 item %r on row %d", name, row)
       
   113             
       
   114         elif qty == 1 and normalize_qty :
       
   115             # one item is a single item
       
   116             qty = None
       
   117 
       
   118         # map
       
   119         if not parent :
       
   120             parent = None
       
   121 
       
   122         elif parent not in row_by_name :
       
   123             raise Exception("Invalid location, got %r on row %r" % (parent, row))
       
   124 
       
   125         else :
       
   126             # lookup row_id
       
   127             parent = row_by_name[parent]
       
   128         
       
   129         # debug
       
   130         log.debug("Load %d: %s %s x%s -> %d", row, name, descr, qty, parent)
       
   131         
       
   132         # store
       
   133         row_by_name[name] = row
       
   134 
       
   135         # yield
       
   136         yield row, parent, name, qty, descr
       
   137 
       
   138 def create_items (session, items) :
       
   139     """
       
   140         Create the given items and add them to the session
       
   141     """
       
   142 
       
   143     # lookup parents
       
   144     items_by_row = {}
       
   145 
       
   146     for row, parent, name, qty, descr in items :
       
   147         # look up parent
       
   148         if parent :
       
   149             parent = items_by_row[parent]
       
   150 
       
   151         # build
       
   152         item = Item(name, descr, qty, parent)
       
   153 
       
   154         # store
       
   155         items_by_row[row] = item
       
   156 
       
   157         # add
       
   158         session.add(item)
       
   159 
       
   160     # number of items added
       
   161     return len(items_by_row)
       
   162 
       
   163 def main () :
       
   164     parser = optparse.OptionParser(usage="Usage: %prog [options] CSV")
       
   165 
       
   166     parser.add_option('-q', '--quiet', action='store_true', help='More output')
       
   167     parser.add_option('-v', '--verbose', action='store_true', help='More output')
       
   168     parser.add_option('-D', '--debug', action='store_true', help="Even more output (SQL queries)")
       
   169 
       
   170     parser.add_option('-d', '--database', help="Database connection URI", metavar='URL')
       
   171     parser.add_option(      '--init-database', action='store_true', help="Initialize database (CREATE)")
       
   172 
       
   173     parser.add_option('-C', '--charset', help="CSV charset", metavar='CHARSET', default='utf-8')
       
   174 
       
   175     parser.add_option('--dry-run', action='store_true', help="Execute INSERTs but don't actually commit()")
       
   176 
       
   177     (options, args) = parser.parse_args()
       
   178 
       
   179     # CSV from args
       
   180     csv_path, = args
       
   181     csv_encoding = options.charset
       
   182     
       
   183     # defaults
       
   184     level = logging.INFO
       
   185     sql_echo = False
       
   186 
       
   187     if options.quiet :
       
   188         level = logging.WARN
       
   189 
       
   190     if options.verbose :
       
   191         level = logging.DEBUG
       
   192     
       
   193     if options.debug :
       
   194         level = logging.DEBUG
       
   195         sql_echo = True
       
   196 
       
   197     logging.basicConfig(format="[%(levelname)5s] %(funcName)25s : %(message)s", level=level)
       
   198 
       
   199     if sql_echo :
       
   200         # echo SQL queries
       
   201         logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
       
   202 
       
   203     # app state
       
   204     app = application.Application(
       
   205             options.database,
       
   206     )
       
   207 
       
   208     # init?
       
   209     if options.init_database :
       
   210         app.create_tables()
       
   211 
       
   212     # load CSV data
       
   213     csv = load_csv(csv_path, encoding=csv_encoding)
       
   214 
       
   215     # parse info
       
   216     item_data = list(parse_csv(csv))
       
   217     
       
   218     # db session
       
   219     session = app.session()
       
   220 
       
   221     # inserts
       
   222     count = create_items(session, item_data)
       
   223     
       
   224     if options.dry_run :
       
   225         log.info("Rolling back %d INSERTs for --dry-run...", count)
       
   226 
       
   227         # revert
       
   228         session.rollback()
       
   229 
       
   230     else :
       
   231         log.info("Committing %d new items..", count)
       
   232 
       
   233         # ok, commit
       
   234         session.commit()
       
   235 
       
   236 if __name__ == '__main__' :
       
   237     main()
       
   238