]> jfr.im git - irc/rizon/acid.git/blob - pyva/populate-zipcodes.py
Merge branch 'edupe_join/part' of https://gitlab.com/rizon/acid into edupe_join/part
[irc/rizon/acid.git] / pyva / populate-zipcodes.py
1 #!/usr/bin/env python2
2 # PyPsd's Internets bot's zip code importer, for the weather commands
3 # This is run separately to PyPsd itself, before if you want weather ZIP codes to work
4
5 import os
6 import sys
7 import codecs
8 import ConfigParser
9 import MySQLdb as db
10
11 # find config file and zipcode csv file
12 config_filename = 'config.ini'
13 if not os.path.exists(config_filename):
14 config_filename = os.path.join('..', config_filename)
15 if not os.path.exists(config_filename):
16 print "We cannot locate the pypsd/pyva config.ini file"
17 print 'This script should be run from the root acid/ directory'
18 exit(1)
19
20 zipcode_csv_file = 'weather-zipcodes.csv'
21 if not os.path.exists(zipcode_csv_file):
22 zipcode_csv_file = os.path.join('data', 'weather-zipcodes.csv')
23 if not os.path.exists(zipcode_csv_file):
24 zipcode_csv_file = os.path.join('pyva', zipcode_csv_file)
25 if not os.path.exists(zipcode_csv_file):
26 print "We cannot locate the zipcode csv file"
27 print 'weather-zipcodes.csv should be in the working dir, in data/ , or in pyva/data/'
28 exit(1)
29
30 # options
31 drop_table = '--drop' in sys.argv
32 debug = '--debug' in sys.argv
33
34 # config
35 config = ConfigParser.ConfigParser()
36 config.readfp(codecs.open(config_filename, 'r', 'utf8'))
37
38 # make db pointer
39 dbx = db.connect(
40 host=config.get('database', 'host'),
41 user=config.get('database', 'user'),
42 passwd=config.get('database', 'passwd'),
43 db=config.get('database', 'db'),
44 )
45 dbx.autocommit(True) # no need to have transactions
46 dbp = dbx.cursor()
47
48 # check if table exists
49 dbp.execute("SELECT count(*) FROM information_schema.tables t WHERE t.table_name='zipcode_citystate';")
50 if dbp.fetchone()[0] == 0:
51 table_exists = False
52 else:
53 table_exists = True
54 rows = 0 # zero rows since table doesn't exist
55
56 # clearing/setting table information
57 try:
58 # check if we should drop the table, to basically 'refresh'and start again
59 if drop_table:
60 if table_exists:
61 dbp.execute("DROP TABLE zipcode_citystate;")
62 print 'Current ZIP table dropped.'
63 table_exists = False
64 exit()
65 else:
66 print 'ZIP table does not exist, cannot drop.'
67 exit()
68
69 if table_exists:
70 # count current rows, see if we have any info in there already
71 dbp.execute("SELECT count(*) FROM zipcode_citystate;")
72 rows = int(dbp.fetchone()[0])
73 if rows > 0:
74 print 'ZIP codes already exist in database. Please run:'
75 print sys.argv[0], '--drop'
76 print 'in order to clear the current ZIP table and populate with new ZIP codes'
77 exit(1)
78 else:
79 print 'ZIP Table does not yet exist.'
80 except Exception, err:
81 print 'Error setting up initial table information: {}'.format(err)
82 raise
83
84 # create table
85 try:
86 dbp.execute("CREATE TABLE IF NOT EXISTS zipcode_citystate (zipcode INT(5), city VARCHAR(15) NOT NULL, state VARCHAR(2) NOT NULL, UNIQUE KEY (zipcode));")
87 print 'ZIP Table created.'
88 except Exception, err:
89 print 'Error creating database table zipcode_citystate: {}'.format(err)
90 raise
91
92 # load zipfile contents
93 if rows == 0:
94 with open(zipcode_csv_file, 'rb') as zipfile:
95 print 'Populating ZIP Table'
96
97 current_row = 0
98 first_line = True # contains row info, not necessary and breaks our int/float/etc
99
100 for line in zipfile:
101 if first_line:
102 first_line = False
103 continue
104
105 # insertion
106 line = line.replace('\n', '').replace('"', '').split(',')
107 if len(line) > 3:
108 zipcode, city, state, latitude, longitude, timezone, dst = line
109
110 try:
111 dbp.execute("INSERT INTO zipcode_citystate (zipcode, city, state) VALUES (%s,%s,%s);", [int(zipcode), city, state])
112 if debug:
113 print 'Inserted ZIP code', zipcode, ':', city, ':', state
114 except Exception, err:
115 print 'Error adding ZIP code to database: ({})'.format(err)
116 print line
117 print zipcode, city, state
118 raise
119
120 # status
121 current_row += 1
122 if current_row % 1000 == 0: # print every 1k rows
123 print current_row, 'rows inserted'
124
125 print current_row, 'rows inserted in total'