]> jfr.im git - irc/rizon/acid.git/blob - pyva/populate-zipcodes.py
Add populate-zipcodes script and required data files, for Internets .w
[irc/rizon/acid.git] / pyva / populate-zipcodes.py
1 #!/usr/bin/env python2
2 # PyPsd's Internets bot's zip code importer, for the weather commands
3 # This is run separately to PyPsd itself, before if you want weather ZIP codes to work
4
5 import os
6 import sys
7 import codecs
8 import ConfigParser
9 import MySQLdb as db
10
11 # find config file and zipcode csv file
12 config_filename = 'config.ini'
13 if not os.path.exists(config_filename):
14 config_filename = os.path.join('..', config_filename)
15 if not os.path.exists(config_filename):
16 print "We cannot locate the pypsd/pyva config.ini file"
17 print 'This script should be run from the root acid/ directory'
18 exit(1)
19
20 zipcode_csv_file = 'weather-zipcodes.csv'
21 if not os.path.exists(zipcode_csv_file):
22 zipcode_csv_file = os.path.join('data', 'weather-zipcodes.csv')
23 if not os.path.exists(zipcode_csv_file):
24 zipcode_csv_file = os.path.join('pyva', zipcode_csv_file)
25 if not os.path.exists(zipcode_csv_file):
26 print "We cannot locate the zipcode csv file"
27 print 'weather-zipcodes.csv should be in the working dir, in data/ , or in pyva/data/'
28 exit(1)
29
30 # options
31 drop_table = '--drop' in sys.argv
32 debug = '--debug' in sys.argv
33
34 # config
35 config = ConfigParser.ConfigParser()
36 config.readfp(codecs.open(config_filename, 'r', 'utf8'))
37
38 # make db pointer
39 dbx = db.connect(
40 host=config.get('database', 'host'),
41 user=config.get('database', 'user'),
42 passwd=config.get('database', 'passwd'),
43 db=config.get('database', 'db'),
44 unix_socket=config.get('database', 'sock')
45 )
46 dbx.autocommit(True) # no need to have transactions
47 dbp = dbx.cursor()
48
49 # check if table exists
50 dbp.execute("SELECT count(*) FROM information_schema.tables t WHERE t.table_name='zipcode_citystate';")
51 if dbp.fetchone()[0] == 0:
52 table_exists = False
53 else:
54 table_exists = True
55 rows = 0 # zero rows since table doesn't exist
56
57 # clearing/setting table information
58 try:
59 # check if we should drop the table, to basically 'refresh'and start again
60 if drop_table:
61 if table_exists:
62 dbp.execute("DROP TABLE zipcode_citystate;")
63 print 'Current ZIP table dropped.'
64 table_exists = False
65 exit()
66 else:
67 print 'ZIP table does not exist, cannot drop.'
68 exit()
69
70 if table_exists:
71 # count current rows, see if we have any info in there already
72 dbp.execute("SELECT count(*) FROM zipcode_citystate;")
73 rows = int(dbp.fetchone()[0])
74 if rows > 0:
75 print 'ZIP codes already exist in database. Please run:'
76 print sys.argv[0], '--drop'
77 print 'in order to clear the current ZIP table and populate with new ZIP codes'
78 exit(1)
79 else:
80 print 'ZIP Table does not yet exist.'
81 except Exception, err:
82 print 'Error setting up initial table information: {}'.format(err)
83 raise
84
85 # create table
86 try:
87 dbp.execute("CREATE TABLE IF NOT EXISTS zipcode_citystate (zipcode INT(5), city VARCHAR(15) NOT NULL, state VARCHAR(2) NOT NULL, UNIQUE KEY (zipcode));")
88 print 'ZIP Table created.'
89 except Exception, err:
90 print 'Error creating database table zipcode_citystate: {}'.format(err)
91 raise
92
93 # load zipfile contents
94 if rows == 0:
95 with open(zipcode_csv_file, 'rb') as zipfile:
96 print 'Populating ZIP Table'
97
98 current_row = 0
99 first_line = True # contains row info, not necessary and breaks our int/float/etc
100
101 for line in zipfile:
102 if first_line:
103 first_line = False
104 continue
105
106 # insertion
107 line = line.replace('\n', '').replace('"', '').split(',')
108 if len(line) > 3:
109 zipcode, city, state, latitude, longitude, timezone, dst = line
110
111 try:
112 dbp.execute("INSERT INTO zipcode_citystate (zipcode, city, state) VALUES (%s,%s,%s);", [int(zipcode), city, state])
113 if debug:
114 print 'Inserted ZIP code', zipcode, ':', city, ':', state
115 except Exception, err:
116 print 'Error adding ZIP code to database: ({})'.format(err)
117 print line
118 print zipcode, city, state
119 raise
120
121 # status
122 current_row += 1
123 if current_row % 1000 == 0: # print every 1k rows
124 print current_row, 'rows inserted'
125
126 print current_row, 'rows inserted in total'