]>
Commit | Line | Data |
---|---|---|
7b261bb8 | 1 | #!/usr/bin/perl |
2 | ||
3 | # This file is part of SurrealServices. | |
4 | # | |
5 | # SurrealServices is free software; you can redistribute it and/or modify | |
6 | # it under the terms of the GNU General Public License as published by | |
7 | # the Free Software Foundation; either version 2 of the License, or | |
8 | # (at your option) any later version. | |
9 | # | |
10 | # SurrealServices is distributed in the hope that it will be useful, | |
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | # GNU General Public License for more details. | |
14 | # | |
15 | # You should have received a copy of the GNU General Public License | |
16 | # along with SurrealServices; if not, write to the Free Software | |
17 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
18 | ||
19 | # SurrealChat.net does not provide the Country/Allocation data, | |
20 | # is in no way associated with maxmind.com, | |
21 | # nor are we providing a license to download/use it. | |
22 | # Be sure to direct availability/accuracy/licensing questions to maxmind.com | |
23 | ||
24 | use strict; | |
25 | #use warnings; | |
26 | use DBI; | |
27 | ||
28 | BEGIN { | |
29 | use Cwd qw( abs_path getcwd ); | |
30 | use File::Basename; | |
31 | my %constants = ( | |
32 | CWD => getcwd(), | |
33 | PREFIX => abs_path(dirname(abs_path($0)).'/..'), | |
34 | ); | |
35 | require constant; import constant(\%constants); | |
36 | } | |
37 | #chdir PREFIX; | |
38 | use lib PREFIX; | |
39 | ||
40 | use Date::Parse; | |
41 | use Text::ParseWords; # is a standard (in 5.8) module | |
28982a43 | 42 | use Time::HiRes qw( time ); |
7b261bb8 | 43 | |
2d25814c | 44 | use SrSv::Conf::sql; |
7b261bb8 | 45 | use SrSv::Conf2Consts qw( sql ); |
46 | use SrSv::Util qw( :say ); | |
28982a43 | 47 | use SrSv::Time qw( split_time ); |
7b261bb8 | 48 | |
49 | sub runSQL($@) { | |
50 | my ($dbh, @strings) = @_; | |
51 | foreach my $string (@strings) { | |
52 | my $sql; | |
53 | foreach my $x (split($/, $string)) { $sql .= $x unless $x =~ /^(#|--)/ or $x eq "\n"} | |
54 | # $dbh->do("START TRANSACTION"); | |
55 | my $printError = $dbh->{PrintError}; | |
56 | $dbh->{PrintError} = 0; | |
57 | foreach my $line (split(/;/s, $sql)) { | |
58 | next unless length($line); | |
59 | #print "$line\n"; | |
60 | eval { $dbh->do($line); }; | |
61 | if($@) { | |
62 | $line =~ s/\s{2,}/ /g; | |
63 | $line =~ s/\n//g; | |
64 | print "$line\n"; | |
65 | } | |
66 | ||
67 | } | |
68 | $dbh->{PrintError} = $printError; | |
69 | # $dbh->do("COMMIT"); | |
70 | } | |
71 | } | |
72 | ||
73 | BEGIN { | |
74 | my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime(); | |
75 | $year += 1900; | |
76 | $mon++; # gmtime returns months January=0 | |
77 | my $date = sprintf("%04d%02d01", $year, $mon); | |
78 | require constant; | |
79 | import constant { | |
80 | #countrydb_url => 'http://www.maxmind.com/download/geoip/database/GeoIPCountryCSV.zip', | |
81 | #FIXME: This needs a date generator! | |
777a5793 | 82 | #countrydb_url => "http://www.maxmind.com/download/geoip/database/GeoLiteCity_CSV/GeoLiteCity_latest.zip", |
83 | countrydb_url => "http://geolite.maxmind.com/download/geoip/database/GeoLiteCity_CSV/GeoLiteCity-latest.zip", | |
7b261bb8 | 84 | srcname => "GeoLiteCity_${date}.zip", |
85 | }; | |
86 | } | |
87 | ||
88 | sub main() { | |
89 | downloadData(); | |
90 | say "Connecting to database..."; | |
91 | my $dbh = dbConnect(); | |
92 | say "Creating new table..."; | |
93 | newTable($dbh); | |
94 | say "Inserting data... "; | |
95 | loadData($dbh); | |
28982a43 | 96 | print "Converting geoip table..."; |
7b261bb8 | 97 | convert($dbh); |
98 | cleanup($dbh); | |
99 | $dbh->disconnect(); | |
100 | say "GeoIP update complete."; | |
101 | } | |
102 | ||
103 | main(); | |
104 | exit 0; | |
105 | ||
106 | sub downloadData() { | |
107 | # This MAY be implementable with an open of a pipe | |
108 | # pipe the output of wget through gzip -d | |
109 | # and then into the load-loop. | |
110 | # It's a bit heavy to run directly from inside services however. | |
111 | # I'd recommend it be run as a crontab script separate from services. | |
112 | ||
113 | #return; | |
114 | my ($stat, $date, $size); | |
115 | my $srcPath = PREFIX.'/data/'.srcname; | |
116 | say $srcPath; | |
117 | use File::stat; | |
118 | if($stat = stat($srcPath)) { | |
119 | print "Checking for updated country data...\n"; | |
777a5793 | 120 | my $header = qx "wget --spider -S @{[countrydb_url]} -O @{[srcname]} 2>&1"; |
7b261bb8 | 121 | ($date) = ($header =~ /Last-Modified: (.*)/); |
122 | ($size) = ($header =~ /Content-Length: (.*)/); | |
123 | } | |
124 | ||
125 | if($stat and $stat->size == $size and $stat->mtime >= str2time($date)) { | |
126 | say "Country data is up to date."; | |
127 | } else { | |
128 | # say $stat->size == $size; | |
129 | # say $stat->mtime >= str2time($date); | |
130 | say "Downloading country data..."; | |
131 | # return; | |
132 | ||
133 | unlink $srcPath; | |
134 | system('wget '.countrydb_url." -O $srcPath"); | |
135 | unless(-e $srcPath) { | |
136 | sayERR "FATAL: Download failed."; | |
137 | exit; | |
138 | } | |
139 | } | |
140 | ||
141 | mkdir PREFIX.'/data/GeoIP/'; | |
142 | say "Decompressing..."; | |
143 | unlink(glob(PREFIX.'/data/GeoIP/Geo*.csv')); | |
144 | system("unzip -j $srcPath -d ".PREFIX.'/data/GeoIP/'); | |
145 | unless(-f PREFIX.'/data/GeoIP/GeoLiteCity-Blocks.csv') { | |
146 | sayERR "FATAL: Decompression failed."; | |
147 | exit -1; | |
148 | } | |
149 | } | |
150 | ||
151 | sub dbConnect() { | |
152 | my $dbh; | |
153 | eval { | |
154 | $dbh = DBI->connect("DBI:mysql:".sql_conf_mysql_db, sql_conf_mysql_user, sql_conf_mysql_pass, | |
155 | { AutoCommit => 1, RaiseError => 1 }) | |
156 | }; | |
157 | ||
158 | if($@) { | |
159 | print STDERR "FATAL: Can't connect to database:\n$@\n"; | |
160 | print STDERR "You must edit config/sql.conf and create a corresponding\nMySQL user and database!\n\n"; | |
161 | exit -1; | |
162 | } | |
163 | return $dbh; | |
164 | } | |
165 | ||
166 | sub newTable($) { | |
167 | my ($dbh) = @_; | |
168 | $dbh->{RaiseError} = 1; | |
169 | $dbh->{PrintError} = 1; | |
170 | ||
171 | runSQL($dbh, | |
48686f9a | 172 | "CREATE TEMPORARY TABLE `tmp_geoip` ( |
79d7f63d | 173 | `low` int unsigned NOT NULL, |
174 | `high` int unsigned NOT NULL, | |
175 | `location` mediumint(8) NOT NULL, | |
7b261bb8 | 176 | PRIMARY KEY (`low`, `high`) |
8a42633b | 177 | ) Engine=MyISAM", |
7b261bb8 | 178 | |
179 | "DROP TABLE IF EXISTS new_geolocation", | |
180 | #"locId,country,region,city,postalCode,latitude,longitude,metroCode,areaCode"; | |
181 | "CREATE TABLE `new_geolocation` ( | |
79d7f63d | 182 | `id` mediumint(8) unsigned NOT NULL, |
7b261bb8 | 183 | `country` char(2) NOT NULL default '-', |
184 | `region` char(2) NOT NULL default '-', | |
185 | `city` varchar(255) NOT NULL default '-', | |
186 | `postalcode` varchar(6) NOT NULL default '-', | |
187 | `latitude` float NOT NULL default 0.0, | |
188 | `longitude` float NOT NULL default 0.0, | |
189 | `metrocode` int unsigned NOT NULL default 0, | |
190 | `areacode` int unsigned NOT NULL default 0, | |
191 | PRIMARY KEY (`id`), | |
192 | KEY `countrykey` (`country`) | |
8a42633b | 193 | ) Engine=MyISAM;", |
b6943128 | 194 | |
195 | "DROP TABLE IF EXISTS `new_metrocode`", | |
7b261bb8 | 196 | "CREATE TABLE `new_metrocode` ( |
197 | `id` smallint NOT NULL default 0, | |
198 | `metro` varchar(128) NOT NULL default '', | |
199 | PRIMARY KEY (`id`) | |
8a42633b | 200 | ) Engine=MyISAM;", |
7b261bb8 | 201 | |
202 | "DROP TABLE IF EXISTS `new_geocountry`", | |
203 | #"locId,country,region,city,postalCode,latitude,longitude,metroCode,areaCode"; | |
204 | "CREATE TABLE `new_geocountry` ( | |
205 | `code` char(2) NOT NULL default '', | |
206 | `country` varchar(255) default '', | |
207 | PRIMARY KEY (`code`) | |
8a42633b | 208 | ) Engine=MyISAM;", |
7b261bb8 | 209 | |
210 | "DROP TABLE IF EXISTS `new_georegion`", | |
211 | #"locId,country,region,city,postalCode,latitude,longitude,metroCode,areaCode"; | |
212 | "CREATE TABLE `new_georegion` ( | |
213 | `country` char(2) NOT NULL default '', | |
214 | `region` char(2) NOT NULL default '', | |
215 | `name` varchar(255) default '', | |
216 | PRIMARY KEY (`country`, `region`) | |
8a42633b | 217 | ) Engine=MyISAM;", |
7b261bb8 | 218 | |
219 | ); | |
220 | } | |
221 | ||
28982a43 | 222 | sub timeDiff($$) { |
223 | my ($time1, $time2) = @_; | |
224 | my ($weeks, $days, $hours, $minutes, $seconds) = split_time($time2 - $time1); | |
2db74488 | 225 | return sprintf("%02d:%02d.%02d", $minutes, int($seconds), 100*($seconds-int($seconds))); |
28982a43 | 226 | } |
227 | ||
7b261bb8 | 228 | sub loadData($) { |
229 | my ($dbh) = @_; | |
230 | $| = 1; | |
231 | =cut | |
232 | my $unpackPath = PREFIX.'/data/'.unpackname; | |
233 | my ($lines) = qx{wc -l $unpackPath}; | |
234 | my $div = int($lines/100); | |
235 | =cut | |
236 | my ($i, @entries); | |
237 | my $fh; | |
238 | my $table; | |
239 | ||
28982a43 | 240 | my $time1 = time(); |
7b261bb8 | 241 | print "Loading geoip data..."; |
242 | ####### geoip ####### | |
7b261bb8 | 243 | $table = 'geoip'; |
48686f9a | 244 | $dbh->do("LOAD DATA LOCAL INFILE |
245 | '@{[PREFIX]}/data/GeoIP/GeoLiteCity-Blocks.csv' | |
246 | INTO TABLE tmp_${table} | |
247 | FIELDS TERMINATED BY ',' ENCLOSED BY '\"' IGNORE 2 LINES"); | |
7b261bb8 | 248 | ####### END geoip ####### |
28982a43 | 249 | my $time2 = time(); |
250 | print " Done. "; say timeDiff($time1, $time2); | |
7b261bb8 | 251 | |
28982a43 | 252 | $time1 = time(); |
7b261bb8 | 253 | print "Loading location data..."; |
254 | ####### locations ####### | |
255 | $table = 'geolocation'; | |
48686f9a | 256 | $dbh->do("LOAD DATA LOCAL INFILE |
257 | '@{[PREFIX]}/data/GeoIP/GeoLiteCity-Location.csv' | |
258 | INTO TABLE new_${table} | |
259 | FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' IGNORE 2 LINES"); | |
7b261bb8 | 260 | ####### END locations ####### |
28982a43 | 261 | $time2 = time(); |
262 | print " Done. "; say timeDiff($time1, $time2); | |
7b261bb8 | 263 | |
264 | ||
28982a43 | 265 | $time1 = time(); |
7b261bb8 | 266 | print "Loading metrocode data..."; |
267 | ####### metrocodes ####### | |
268 | open ($fh, '<', PREFIX.'/data/GeoIP/metrocodes.txt'); | |
269 | $table = 'metrocode'; | |
48686f9a | 270 | my $columns = "(`id`, `metro`)"; |
7b261bb8 | 271 | |
272 | $dbh->do("ALTER TABLE `new_$table` DISABLE KEYS"); | |
273 | ||
274 | while(my $x = <$fh>) { | |
275 | chomp $x; | |
276 | =cut | |
277 | if($i == 0 or !($i % $div)) { | |
278 | printf("\b\b\b\b%3d%", ($i/$lines)*100); | |
279 | } | |
280 | =cut | |
281 | my @args = map( { $dbh->quote($_) } split(' ', $x, 2) ); | |
282 | push @entries, '(' . join(',', @args) . ')' if scalar(@args) == 2; | |
283 | if(scalar(@entries) >= 100) { #1000 only gives another 10% boost for 10x as much memory | |
b6943128 | 284 | $dbh->do("INSERT INTO `new_$table` $columns VALUES ".join(',', @entries)); |
7b261bb8 | 285 | @entries = (); |
286 | } | |
287 | ||
288 | $i++; | |
289 | } | |
b6943128 | 290 | $dbh->do(("INSERT INTO `new_$table` $columns VALUES ".join(',', @entries))) if scalar(@entries); |
7b261bb8 | 291 | @entries = (); |
292 | $dbh->do("ALTER TABLE `new_$table` ENABLE KEYS"); | |
293 | close $fh; | |
294 | ####### END metrocodes ####### | |
28982a43 | 295 | $time2 = time(); |
296 | print " Done. "; say timeDiff($time1, $time2); | |
7b261bb8 | 297 | |
28982a43 | 298 | $time1 = time(); |
7b261bb8 | 299 | print "Loading region data..."; |
300 | ####### regions ####### | |
301 | $table = 'georegion'; | |
302 | $columns = "(`country`, `region`, `name`)"; | |
303 | ||
48686f9a | 304 | $dbh->do("LOAD DATA LOCAL INFILE |
305 | '@{[PREFIX]}/data/fips10_4' | |
306 | INTO TABLE new_${table} | |
307 | FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' IGNORE 1 LINES"); | |
7b261bb8 | 308 | |
48686f9a | 309 | $dbh->do("LOAD DATA LOCAL INFILE |
310 | '@{[PREFIX]}/data/iso3166_2' | |
311 | INTO TABLE new_${table} | |
312 | FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' IGNORE 1 LINES"); | |
7b261bb8 | 313 | |
7b261bb8 | 314 | ####### END regions ####### |
28982a43 | 315 | $time2 = time(); |
316 | print " Done. "; say timeDiff($time1, $time2); | |
7b261bb8 | 317 | |
28982a43 | 318 | $time1 = time(); |
7b261bb8 | 319 | print "Loading country data..."; |
320 | ####### iso3166 Country Names ####### | |
7b261bb8 | 321 | $table = 'geocountry'; |
48686f9a | 322 | $dbh->do("LOAD DATA LOCAL INFILE |
323 | '@{[PREFIX]}/data/iso3166' | |
324 | INTO TABLE new_${table} | |
325 | FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' IGNORE 1 LINES"); | |
7b261bb8 | 326 | ####### END iso3166 Country Names ####### |
28982a43 | 327 | $time2 = time(); |
328 | print " Done. "; say timeDiff($time1, $time2); | |
7b261bb8 | 329 | |
b6943128 | 330 | $dbh->do("UNLOCK TABLES"); |
7b261bb8 | 331 | } |
332 | ||
333 | sub convert($) { | |
334 | my ($dbh) = @_; | |
335 | ||
28982a43 | 336 | my $time1 = time(); |
7b261bb8 | 337 | runSQL($dbh, |
7b261bb8 | 338 | "CREATE TABLE `new_geoip` ( |
79d7f63d | 339 | `low` int unsigned NOT NULL, |
340 | `high` int unsigned NOT NULL, | |
341 | `location` mediumint(8) NOT NULL, | |
342 | `ip_poly` polygon NOT NULL, | |
7b261bb8 | 343 | PRIMARY KEY (`low`, `high`), |
344 | SPATIAL INDEX (`ip_poly`) | |
8a42633b | 345 | ) Engine=MyISAM", |
7b261bb8 | 346 | "ALTER TABLE `new_geoip` DISABLE KEYS", |
b6943128 | 347 | "INSERT INTO new_geoip (low,high,location,ip_poly) |
7b261bb8 | 348 | SELECT low, high, location, |
349 | GEOMFROMWKB(POLYGON(LINESTRING( POINT(low, -1), POINT(high, -1), | |
350 | POINT(high, 1), POINT(low, 1), POINT(low, -1)))) FROM tmp_geoip;", | |
351 | "ALTER TABLE `new_geoip` ENABLE KEYS", | |
352 | "DROP TABLE IF EXISTS `tmp_geoip`", | |
353 | ); | |
28982a43 | 354 | my $time2 = time(); |
355 | print " Done. "; say timeDiff($time1, $time2); | |
356 | ||
7b261bb8 | 357 | } |
358 | ||
359 | sub cleanup($) { | |
360 | my ($dbh) = @_; | |
361 | ||
362 | # print "\b\b\b\bdone.\nRemoving old table...\n"; | |
363 | $dbh->do("DROP TABLE IF EXISTS `oldcountry`"); | |
364 | say "Renaming new tables..."; | |
365 | $dbh->{RaiseError} = 0; | |
366 | $dbh->{PrintError} = 0; | |
367 | $dbh->do("OPTIMIZE TABLE `new_geoip`"); | |
368 | $dbh->do("ANALYZE TABLE `new_geoip`"); | |
369 | # Doing the renames cannot be done atomically | |
370 | # as sometimes `country` doesn't exist yet. | |
371 | $dbh->do("START TRANSACTION"); | |
372 | $dbh->do("RENAME TABLE `geoip` TO `old_geoip`"); | |
373 | $dbh->do("RENAME TABLE `new_geoip` TO `geoip`"); | |
374 | ||
375 | $dbh->do("RENAME TABLE `geolocation` TO `old_geolocation`"); | |
376 | $dbh->do("RENAME TABLE `new_geolocation` TO `geolocation`"); | |
377 | ||
378 | $dbh->do("RENAME TABLE `metrocode` TO `old_metrocode`"); | |
379 | $dbh->do("RENAME TABLE `new_metrocode` TO `metrocode`"); | |
380 | ||
381 | $dbh->do("RENAME TABLE `georegion` TO `old_georegion`"); | |
382 | $dbh->do("RENAME TABLE `new_georegion` TO `georegion`"); | |
383 | ||
384 | $dbh->do("RENAME TABLE `geocountry` TO `old_geocountry`"); | |
385 | $dbh->do("RENAME TABLE `new_geocountry` TO `geocountry`"); | |
386 | ||
387 | $dbh->do("DROP TABLE `old_geoip`"); | |
388 | $dbh->do("DROP TABLE `old_geolocation`"); | |
389 | $dbh->do("DROP TABLE `old_metrocode`"); | |
390 | $dbh->do("DROP TABLE `old_georegion`"); | |
391 | $dbh->do("DROP TABLE `old_geocountry`"); | |
392 | $dbh->do("COMMIT"); | |
393 | #unlink PREFIX.'/data/'.unpackname; | |
394 | } |