Mercurial > hg > Lemuriformes
comparison lemuriformes/csv2sqlite.py @ 11:afc259799019
[CSV] add script to convert to SQL
| author | Jeff Hammel <k0scist@gmail.com> |
|---|---|
| date | Sun, 10 Dec 2017 13:58:55 -0800 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 10:ad1bf59eedb0 | 11:afc259799019 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 """ | |
| 4 convert CSV files to a SQLite DB file | |
| 5 """ | |
| 6 | |
| 7 # For SQLite types see | |
| 8 # http://www.sqlite.org/datatype3.html | |
| 9 | |
| 10 import argparse | |
| 11 import csv | |
| 12 import os | |
| 13 import sqlite3 | |
| 14 import sys | |
| 15 from collections import OrderedDict | |
| 16 from .cast import string | |
| 17 from .sql import SQLConnection | |
| 18 | |
| 19 | |
| 20 def read_csv(csv_file): | |
| 21 """read csv file with header and return list of dicts""" | |
| 22 | |
| 23 reader = csv.DictReader(csv_file) | |
| 24 return [row for row in reader] | |
| 25 | |
| 26 | |
| 27 def path_root(path): | |
| 28 """return basename file root sans extension""" | |
| 29 | |
| 30 return os.path.splitext(os.path.basename(path))[0] | |
| 31 | |
| 32 | |
| 33 class SQLiteConnection(SQLConnection): | |
| 34 """connection class to SQLite database""" | |
| 35 | |
| 36 # mapping of python types to SQLite types | |
| 37 types = {int: "INTEGER", | |
| 38 float: "REAL", | |
| 39 str: "TEXT"} | |
| 40 | |
| 41 def __init__(self, db_file): | |
| 42 if not os.path.exists(db_file): | |
| 43 with open(db_file, 'wb') as f: | |
| 44 # touch file | |
| 45 pass | |
| 46 self.db_file = db_file | |
| 47 self.conn = sqlite3.connect(self.db_file) | |
| 48 | |
| 49 def __call__(self, sql, *args): | |
| 50 c = self.conn.cursor() | |
| 51 c.execute(sql, args) | |
| 52 retval = c.fetchall() | |
| 53 self.conn.commit() | |
| 54 return retval | |
| 55 | |
| 56 def __del__(self): | |
| 57 self.conn.close() | |
| 58 | |
| 59 def tables(self): | |
| 60 """ | |
| 61 return tables in the SQLite database | |
| 62 Ref: https://stackoverflow.com/questions/82875/how-to-list-the-tables-in-an-sqlite-database-file-that-was-opened-with-attach | |
| 63 """ | |
| 64 sql = "SELECT name FROM sqlite_master WHERE type='table';" | |
| 65 results = self(sql) | |
| 66 return [row[0] for row in results] | |
| 67 | |
| 68 def create_table(self, table, **columns): | |
| 69 """add a table to the database""" | |
| 70 | |
| 71 columns = ', '.join(["{key} {value}".format(key=key, value=value) | |
| 72 for key, value in columns.items()]) | |
| 73 self("CREATE TABLE IF NOT EXISTS {table} ({columns});".format(table=table, | |
| 74 columns=columns)) | |
| 75 | |
| 76 def schema(self, table): | |
| 77 """get dictionary of typing for a table""" | |
| 78 # http://www.sqlite.org/pragma.html#pragma_table_info | |
| 79 | |
| 80 sql = "PRAGMA table_info('{}')".format(table) | |
| 81 results = self(sql) | |
| 82 # TODO: get data types from e.g. | |
| 83 # [(0, u'how', u'', 0, None, 0), (1, u'are', u'', 0, None, 0), (2, u'you', u'', 0, None, 0)] | |
| 84 # ... now | |
| 85 # [(0, u'how', u'TEXT', 0, None, 0), (1, u'you', u'TEXT', 0, None, 0), (2, u'are', u'TEXT', 0, None, 0)] | |
| 86 return OrderedDict([(result[1], result[2]) | |
| 87 for result in results]) | |
| 88 | |
| 89 def columns(self, table): | |
| 90 """return ordered list of column names""" | |
| 91 | |
| 92 return self.schema(table).keys() | |
| 93 | |
| 94 def select(self, table): | |
| 95 """just `select *` for now""" | |
| 96 | |
| 97 sql = "SELECT * FROM {table};" | |
| 98 sql = sql.format(table=table) | |
| 99 data = self(sql) | |
| 100 columns = self.columns(table) | |
| 101 return [OrderedDict(zip(columns, row)) | |
| 102 for row in data] | |
| 103 | |
| 104 select_all = select | |
| 105 | |
| 106 | |
| 107 class CSV2SQLite(object): | |
| 108 """ | |
| 109 convert Comma Separated Value input to SQLite | |
| 110 """ | |
| 111 # TODO: allow conversion to arbitrary SQL | |
| 112 | |
| 113 def __init__(self, output): | |
| 114 self.conn = SQLiteConnection(output) | |
| 115 | |
| 116 def __call__(self, *csv_files, **csv_dict): | |
| 117 | |
| 118 # allow tables of default(ing) name | |
| 119 csv_dict.update({csv_file: None | |
| 120 for csv_file in csv_files}) | |
| 121 | |
| 122 for csv_file, tablename in csv_dict.items(): | |
| 123 self.csv2table(csv_file, tablename=tablename) | |
| 124 | |
| 125 def csv2table(self, csv_file, tablename=None): | |
| 126 if isinstance(csv_file, string): | |
| 127 with open(csv_file) as f: | |
| 128 return self.csv2table(f, tablename=tablename) | |
| 129 if tablename is None: | |
| 130 # TODO: allow lookup from scheme | |
| 131 tablename = path_root(csv_file.name) | |
| 132 # read csv | |
| 133 data = read_csv(csv_file) | |
| 134 assert data | |
| 135 | |
| 136 # infer schema from data | |
| 137 # TODO: allow lookup from scheme | |
| 138 columns = {column: "TEXT" for column in data[0].keys()} | |
| 139 | |
| 140 # create table | |
| 141 self.conn.create_table(tablename, **columns) | |
| 142 | |
| 143 # inseert data | |
| 144 for row in data: | |
| 145 self.conn.insert_row(tablename, **row) | |
| 146 | |
| 147 | |
| 148 def main(args=sys.argv[1:]): | |
| 149 """CLI""" | |
| 150 | |
| 151 # parse command line | |
| 152 parser = argparse.ArgumentParser(description=__doc__) | |
| 153 parser.add_argument('input', nargs='+', | |
| 154 type=argparse.FileType('r'), | |
| 155 help="input CSV files; table names taken from file names") | |
| 156 parser.add_argument('-o', '--output', dest='output', | |
| 157 required=True, | |
| 158 help="output SQLite file") | |
| 159 options = parser.parse_args(args) | |
| 160 | |
| 161 # overwrite the file | |
| 162 # TODO: deprecate and allow appending | |
| 163 with open(options.output, 'w') as f: | |
| 164 pass | |
| 165 | |
| 166 # instantiate converter | |
| 167 conn = CSV2SQLite(options.output) | |
| 168 | |
| 169 # convert input CSV to SQLite tables | |
| 170 conn(*options.input) | |
| 171 | |
| 172 | |
| 173 if __name__ == '__main__': | |
| 174 main() |
