]> jfr.im git - irc/rizon/acid.git/blob - pyva/populate-zipcodes.py
Merge branch 'quotes/search+string' into 'master'
[irc/rizon/acid.git] / pyva / populate-zipcodes.py
1 #!/usr/bin/env python2
2 # PyPsd's Internets bot's zip code importer, for the weather commands
3 # This is run separately to PyPsd itself, before if you want weather ZIP codes to work
4
5 import os
6 import sys
7 import yaml
8 import MySQLdb as db
9
10 # find config file and zipcode csv file
11 config_filename = 'pypsd.yml'
12 if not os.path.exists(config_filename):
13 config_filename = os.path.join('..', config_filename)
14 if not os.path.exists(config_filename):
15 print "We cannot locate the configuration file pypsd.yml"
16 print 'This script should be run from the root acid/ directory'
17 exit(1)
18
19 zipcode_csv_file = 'weather-zipcodes.csv'
20 if not os.path.exists(zipcode_csv_file):
21 zipcode_csv_file = os.path.join('data', 'weather-zipcodes.csv')
22 if not os.path.exists(zipcode_csv_file):
23 zipcode_csv_file = os.path.join('pyva', zipcode_csv_file)
24 if not os.path.exists(zipcode_csv_file):
25 print "We cannot locate the zipcode csv file"
26 print 'weather-zipcodes.csv should be in the working dir, in data/ , or in pyva/data/'
27 exit(1)
28
29 # options
30 drop_table = '--drop' in sys.argv
31 debug = '--debug' in sys.argv
32
33 # config
34 with open(config_filename, "r") as config_fd:
35 config = yaml.load(config_fd)
36
37 # make db pointer
38 dbx = db.connect(
39 host=config.get('database').get('host'),
40 user=config.get('database').get('user'),
41 passwd=config.get('database').get('passwd'),
42 db=config.get('database').get('db'),
43 )
44 dbx.autocommit(True) # no need to have transactions
45 dbp = dbx.cursor()
46
47 # check if table exists
48 dbp.execute("SELECT count(*) FROM information_schema.tables t WHERE t.table_name='zipcode_citystate';")
49 if dbp.fetchone()[0] == 0:
50 table_exists = False
51 else:
52 table_exists = True
53 rows = 0 # zero rows since table doesn't exist
54
55 # clearing/setting table information
56 try:
57 # check if we should drop the table, to basically 'refresh'and start again
58 if drop_table:
59 if table_exists:
60 dbp.execute("DROP TABLE zipcode_citystate;")
61 print 'Current ZIP table dropped.'
62 table_exists = False
63 exit()
64 else:
65 print 'ZIP table does not exist, cannot drop.'
66 exit()
67
68 if table_exists:
69 # count current rows, see if we have any info in there already
70 dbp.execute("SELECT count(*) FROM zipcode_citystate;")
71 rows = int(dbp.fetchone()[0])
72 if rows > 0:
73 print 'ZIP codes already exist in database. Please run:'
74 print sys.argv[0], '--drop'
75 print 'in order to clear the current ZIP table and populate with new ZIP codes'
76 exit(1)
77 else:
78 print 'ZIP Table does not yet exist.'
79 except Exception, err:
80 print 'Error setting up initial table information: {}'.format(err)
81 raise
82
83 # create table
84 try:
85 dbp.execute("CREATE TABLE IF NOT EXISTS zipcode_citystate (zipcode INT(5), city VARCHAR(15) NOT NULL, state VARCHAR(2) NOT NULL, UNIQUE KEY (zipcode));")
86 print 'ZIP Table created.'
87 except Exception, err:
88 print 'Error creating database table zipcode_citystate: {}'.format(err)
89 raise
90
91 # load zipfile contents
92 if rows == 0:
93 with open(zipcode_csv_file, 'rb') as zipfile:
94 print 'Populating ZIP Table'
95
96 current_row = 0
97 first_line = True # contains row info, not necessary and breaks our int/float/etc
98
99 for line in zipfile:
100 if first_line:
101 first_line = False
102 continue
103
104 # insertion
105 line = line.replace('\n', '').replace('"', '').split(',')
106 if len(line) > 3:
107 zipcode, city, state, latitude, longitude, timezone, dst = line
108
109 try:
110 dbp.execute("INSERT INTO zipcode_citystate (zipcode, city, state) VALUES (%s,%s,%s);", [int(zipcode), city, state])
111 if debug:
112 print 'Inserted ZIP code', zipcode, ':', city, ':', state
113 except Exception, err:
114 print 'Error adding ZIP code to database: ({})'.format(err)
115 print line
116 print zipcode, city, state
117 raise
118
119 # status
120 current_row += 1
121 if current_row % 1000 == 0: # print every 1k rows
122 print current_row, 'rows inserted'
123
124 print current_row, 'rows inserted in total'