]> jfr.im git - irc/SurrealServices/srsv.git/blame - branches/0.4.3/utils/geoip.pl
geolite has changed a bit, so we update in kind
[irc/SurrealServices/srsv.git] / branches / 0.4.3 / utils / geoip.pl
CommitLineData
7b261bb8 1#!/usr/bin/perl
2
3# This file is part of SurrealServices.
4#
5# SurrealServices is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 2 of the License, or
8# (at your option) any later version.
9#
10# SurrealServices is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with SurrealServices; if not, write to the Free Software
17# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
19# SurrealChat.net does not provide the Country/Allocation data,
20# is in no way associated with maxmind.com,
21# nor are we providing a license to download/use it.
22# Be sure to direct availability/accuracy/licensing questions to maxmind.com
23
24use strict;
25#use warnings;
26use DBI;
27
28BEGIN {
29 use Cwd qw( abs_path getcwd );
30 use File::Basename;
31 my %constants = (
32 CWD => getcwd(),
33 PREFIX => abs_path(dirname(abs_path($0)).'/..'),
34 );
35 require constant; import constant(\%constants);
36}
37#chdir PREFIX;
38use lib PREFIX;
39
40use Date::Parse;
41use Text::ParseWords; # is a standard (in 5.8) module
28982a43 42use Time::HiRes qw( time );
7b261bb8 43
2d25814c 44use SrSv::Conf::sql;
7b261bb8 45use SrSv::Conf2Consts qw( sql );
46use SrSv::Util qw( :say );
28982a43 47use SrSv::Time qw( split_time );
7b261bb8 48
49sub runSQL($@) {
50 my ($dbh, @strings) = @_;
51 foreach my $string (@strings) {
52 my $sql;
53 foreach my $x (split($/, $string)) { $sql .= $x unless $x =~ /^(#|--)/ or $x eq "\n"}
54# $dbh->do("START TRANSACTION");
55 my $printError = $dbh->{PrintError};
56 $dbh->{PrintError} = 0;
57 foreach my $line (split(/;/s, $sql)) {
58 next unless length($line);
59 #print "$line\n";
60 eval { $dbh->do($line); };
61 if($@) {
62 $line =~ s/\s{2,}/ /g;
63 $line =~ s/\n//g;
64 print "$line\n";
65 }
66
67 }
68 $dbh->{PrintError} = $printError;
69# $dbh->do("COMMIT");
70 }
71}
72
73BEGIN {
74 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime();
75 $year += 1900;
76 $mon++; # gmtime returns months January=0
77 my $date = sprintf("%04d%02d01", $year, $mon);
78 require constant;
79 import constant {
80 #countrydb_url => 'http://www.maxmind.com/download/geoip/database/GeoIPCountryCSV.zip',
81 #FIXME: This needs a date generator!
777a5793 82 #countrydb_url => "http://www.maxmind.com/download/geoip/database/GeoLiteCity_CSV/GeoLiteCity_latest.zip",
83 countrydb_url => "http://geolite.maxmind.com/download/geoip/database/GeoLiteCity_CSV/GeoLiteCity-latest.zip",
7b261bb8 84 srcname => "GeoLiteCity_${date}.zip",
85 };
86}
87
88sub main() {
89 downloadData();
90 say "Connecting to database...";
91 my $dbh = dbConnect();
92 say "Creating new table...";
93 newTable($dbh);
94 say "Inserting data... ";
95 loadData($dbh);
28982a43 96 print "Converting geoip table...";
7b261bb8 97 convert($dbh);
98 cleanup($dbh);
99 $dbh->disconnect();
100 say "GeoIP update complete.";
101}
102
103main();
104exit 0;
105
106sub downloadData() {
107 # This MAY be implementable with an open of a pipe
108 # pipe the output of wget through gzip -d
109 # and then into the load-loop.
110 # It's a bit heavy to run directly from inside services however.
111 # I'd recommend it be run as a crontab script separate from services.
112
113 #return;
114 my ($stat, $date, $size);
115 my $srcPath = PREFIX.'/data/'.srcname;
116 say $srcPath;
117 use File::stat;
118 if($stat = stat($srcPath)) {
119 print "Checking for updated country data...\n";
777a5793 120 my $header = qx "wget --spider -S @{[countrydb_url]} -O @{[srcname]} 2>&1";
7b261bb8 121 ($date) = ($header =~ /Last-Modified: (.*)/);
122 ($size) = ($header =~ /Content-Length: (.*)/);
123 }
124
125 if($stat and $stat->size == $size and $stat->mtime >= str2time($date)) {
126 say "Country data is up to date.";
127 } else {
128# say $stat->size == $size;
129# say $stat->mtime >= str2time($date);
130 say "Downloading country data...";
131# return;
132
133 unlink $srcPath;
134 system('wget '.countrydb_url." -O $srcPath");
135 unless(-e $srcPath) {
136 sayERR "FATAL: Download failed.";
137 exit;
138 }
139 }
140
141 mkdir PREFIX.'/data/GeoIP/';
142 say "Decompressing...";
143 unlink(glob(PREFIX.'/data/GeoIP/Geo*.csv'));
144 system("unzip -j $srcPath -d ".PREFIX.'/data/GeoIP/');
145 unless(-f PREFIX.'/data/GeoIP/GeoLiteCity-Blocks.csv') {
146 sayERR "FATAL: Decompression failed.";
147 exit -1;
148 }
149}
150
151sub dbConnect() {
152 my $dbh;
153 eval {
154 $dbh = DBI->connect("DBI:mysql:".sql_conf_mysql_db, sql_conf_mysql_user, sql_conf_mysql_pass,
155 { AutoCommit => 1, RaiseError => 1 })
156 };
157
158 if($@) {
159 print STDERR "FATAL: Can't connect to database:\n$@\n";
160 print STDERR "You must edit config/sql.conf and create a corresponding\nMySQL user and database!\n\n";
161 exit -1;
162 }
163 return $dbh;
164}
165
166sub newTable($) {
167 my ($dbh) = @_;
168 $dbh->{RaiseError} = 1;
169 $dbh->{PrintError} = 1;
170
171 runSQL($dbh,
48686f9a 172 "CREATE TEMPORARY TABLE `tmp_geoip` (
79d7f63d 173 `low` int unsigned NOT NULL,
174 `high` int unsigned NOT NULL,
175 `location` mediumint(8) NOT NULL,
7b261bb8 176 PRIMARY KEY (`low`, `high`)
8a42633b 177 ) Engine=MyISAM",
7b261bb8 178
179 "DROP TABLE IF EXISTS new_geolocation",
180 #"locId,country,region,city,postalCode,latitude,longitude,metroCode,areaCode";
181 "CREATE TABLE `new_geolocation` (
79d7f63d 182 `id` mediumint(8) unsigned NOT NULL,
7b261bb8 183 `country` char(2) NOT NULL default '-',
184 `region` char(2) NOT NULL default '-',
185 `city` varchar(255) NOT NULL default '-',
186 `postalcode` varchar(6) NOT NULL default '-',
187 `latitude` float NOT NULL default 0.0,
188 `longitude` float NOT NULL default 0.0,
189 `metrocode` int unsigned NOT NULL default 0,
190 `areacode` int unsigned NOT NULL default 0,
191 PRIMARY KEY (`id`),
192 KEY `countrykey` (`country`)
8a42633b 193 ) Engine=MyISAM;",
b6943128 194
195 "DROP TABLE IF EXISTS `new_metrocode`",
7b261bb8 196 "CREATE TABLE `new_metrocode` (
197 `id` smallint NOT NULL default 0,
198 `metro` varchar(128) NOT NULL default '',
199 PRIMARY KEY (`id`)
8a42633b 200 ) Engine=MyISAM;",
7b261bb8 201
202 "DROP TABLE IF EXISTS `new_geocountry`",
203 #"locId,country,region,city,postalCode,latitude,longitude,metroCode,areaCode";
204 "CREATE TABLE `new_geocountry` (
205 `code` char(2) NOT NULL default '',
206 `country` varchar(255) default '',
207 PRIMARY KEY (`code`)
8a42633b 208 ) Engine=MyISAM;",
7b261bb8 209
210 "DROP TABLE IF EXISTS `new_georegion`",
211 #"locId,country,region,city,postalCode,latitude,longitude,metroCode,areaCode";
212 "CREATE TABLE `new_georegion` (
213 `country` char(2) NOT NULL default '',
214 `region` char(2) NOT NULL default '',
215 `name` varchar(255) default '',
216 PRIMARY KEY (`country`, `region`)
8a42633b 217 ) Engine=MyISAM;",
7b261bb8 218
219 );
220}
221
28982a43 222sub timeDiff($$) {
223 my ($time1, $time2) = @_;
224 my ($weeks, $days, $hours, $minutes, $seconds) = split_time($time2 - $time1);
2db74488 225 return sprintf("%02d:%02d.%02d", $minutes, int($seconds), 100*($seconds-int($seconds)));
28982a43 226}
227
7b261bb8 228sub loadData($) {
229 my ($dbh) = @_;
230 $| = 1;
231=cut
232 my $unpackPath = PREFIX.'/data/'.unpackname;
233 my ($lines) = qx{wc -l $unpackPath};
234 my $div = int($lines/100);
235=cut
236 my ($i, @entries);
237 my $fh;
238 my $table;
239
28982a43 240 my $time1 = time();
7b261bb8 241 print "Loading geoip data...";
242####### geoip #######
7b261bb8 243 $table = 'geoip';
48686f9a 244 $dbh->do("LOAD DATA LOCAL INFILE
245 '@{[PREFIX]}/data/GeoIP/GeoLiteCity-Blocks.csv'
246 INTO TABLE tmp_${table}
247 FIELDS TERMINATED BY ',' ENCLOSED BY '\"' IGNORE 2 LINES");
7b261bb8 248####### END geoip #######
28982a43 249 my $time2 = time();
250 print " Done. "; say timeDiff($time1, $time2);
7b261bb8 251
28982a43 252 $time1 = time();
7b261bb8 253 print "Loading location data...";
254####### locations #######
255 $table = 'geolocation';
48686f9a 256 $dbh->do("LOAD DATA LOCAL INFILE
257 '@{[PREFIX]}/data/GeoIP/GeoLiteCity-Location.csv'
258 INTO TABLE new_${table}
259 FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' IGNORE 2 LINES");
7b261bb8 260####### END locations #######
28982a43 261 $time2 = time();
262 print " Done. "; say timeDiff($time1, $time2);
7b261bb8 263
264
28982a43 265 $time1 = time();
7b261bb8 266 print "Loading metrocode data...";
267####### metrocodes #######
268 open ($fh, '<', PREFIX.'/data/GeoIP/metrocodes.txt');
269 $table = 'metrocode';
48686f9a 270 my $columns = "(`id`, `metro`)";
7b261bb8 271
272 $dbh->do("ALTER TABLE `new_$table` DISABLE KEYS");
273
274 while(my $x = <$fh>) {
275 chomp $x;
276=cut
277 if($i == 0 or !($i % $div)) {
278 printf("\b\b\b\b%3d%", ($i/$lines)*100);
279 }
280=cut
281 my @args = map( { $dbh->quote($_) } split(' ', $x, 2) );
282 push @entries, '(' . join(',', @args) . ')' if scalar(@args) == 2;
283 if(scalar(@entries) >= 100) { #1000 only gives another 10% boost for 10x as much memory
b6943128 284 $dbh->do("INSERT INTO `new_$table` $columns VALUES ".join(',', @entries));
7b261bb8 285 @entries = ();
286 }
287
288 $i++;
289 }
b6943128 290 $dbh->do(("INSERT INTO `new_$table` $columns VALUES ".join(',', @entries))) if scalar(@entries);
7b261bb8 291 @entries = ();
292 $dbh->do("ALTER TABLE `new_$table` ENABLE KEYS");
293 close $fh;
294####### END metrocodes #######
28982a43 295 $time2 = time();
296 print " Done. "; say timeDiff($time1, $time2);
7b261bb8 297
28982a43 298 $time1 = time();
7b261bb8 299 print "Loading region data...";
300####### regions #######
301 $table = 'georegion';
302 $columns = "(`country`, `region`, `name`)";
303
48686f9a 304 $dbh->do("LOAD DATA LOCAL INFILE
305 '@{[PREFIX]}/data/fips10_4'
306 INTO TABLE new_${table}
307 FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' IGNORE 1 LINES");
7b261bb8 308
48686f9a 309 $dbh->do("LOAD DATA LOCAL INFILE
310 '@{[PREFIX]}/data/iso3166_2'
311 INTO TABLE new_${table}
312 FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' IGNORE 1 LINES");
7b261bb8 313
7b261bb8 314####### END regions #######
28982a43 315 $time2 = time();
316 print " Done. "; say timeDiff($time1, $time2);
7b261bb8 317
28982a43 318 $time1 = time();
7b261bb8 319 print "Loading country data...";
320####### iso3166 Country Names #######
7b261bb8 321 $table = 'geocountry';
48686f9a 322 $dbh->do("LOAD DATA LOCAL INFILE
323 '@{[PREFIX]}/data/iso3166'
324 INTO TABLE new_${table}
325 FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' IGNORE 1 LINES");
7b261bb8 326####### END iso3166 Country Names #######
28982a43 327 $time2 = time();
328 print " Done. "; say timeDiff($time1, $time2);
7b261bb8 329
b6943128 330 $dbh->do("UNLOCK TABLES");
7b261bb8 331}
332
333sub convert($) {
334 my ($dbh) = @_;
335
28982a43 336 my $time1 = time();
7b261bb8 337 runSQL($dbh,
7b261bb8 338 "CREATE TABLE `new_geoip` (
79d7f63d 339 `low` int unsigned NOT NULL,
340 `high` int unsigned NOT NULL,
341 `location` mediumint(8) NOT NULL,
342 `ip_poly` polygon NOT NULL,
7b261bb8 343 PRIMARY KEY (`low`, `high`),
344 SPATIAL INDEX (`ip_poly`)
8a42633b 345 ) Engine=MyISAM",
7b261bb8 346 "ALTER TABLE `new_geoip` DISABLE KEYS",
b6943128 347 "INSERT INTO new_geoip (low,high,location,ip_poly)
7b261bb8 348 SELECT low, high, location,
349 GEOMFROMWKB(POLYGON(LINESTRING( POINT(low, -1), POINT(high, -1),
350 POINT(high, 1), POINT(low, 1), POINT(low, -1)))) FROM tmp_geoip;",
351 "ALTER TABLE `new_geoip` ENABLE KEYS",
352 "DROP TABLE IF EXISTS `tmp_geoip`",
353 );
28982a43 354 my $time2 = time();
355 print " Done. "; say timeDiff($time1, $time2);
356
7b261bb8 357}
358
359sub cleanup($) {
360 my ($dbh) = @_;
361
362# print "\b\b\b\bdone.\nRemoving old table...\n";
363 $dbh->do("DROP TABLE IF EXISTS `oldcountry`");
364 say "Renaming new tables...";
365 $dbh->{RaiseError} = 0;
366 $dbh->{PrintError} = 0;
367 $dbh->do("OPTIMIZE TABLE `new_geoip`");
368 $dbh->do("ANALYZE TABLE `new_geoip`");
369 # Doing the renames cannot be done atomically
370 # as sometimes `country` doesn't exist yet.
371 $dbh->do("START TRANSACTION");
372 $dbh->do("RENAME TABLE `geoip` TO `old_geoip`");
373 $dbh->do("RENAME TABLE `new_geoip` TO `geoip`");
374
375 $dbh->do("RENAME TABLE `geolocation` TO `old_geolocation`");
376 $dbh->do("RENAME TABLE `new_geolocation` TO `geolocation`");
377
378 $dbh->do("RENAME TABLE `metrocode` TO `old_metrocode`");
379 $dbh->do("RENAME TABLE `new_metrocode` TO `metrocode`");
380
381 $dbh->do("RENAME TABLE `georegion` TO `old_georegion`");
382 $dbh->do("RENAME TABLE `new_georegion` TO `georegion`");
383
384 $dbh->do("RENAME TABLE `geocountry` TO `old_geocountry`");
385 $dbh->do("RENAME TABLE `new_geocountry` TO `geocountry`");
386
387 $dbh->do("DROP TABLE `old_geoip`");
388 $dbh->do("DROP TABLE `old_geolocation`");
389 $dbh->do("DROP TABLE `old_metrocode`");
390 $dbh->do("DROP TABLE `old_georegion`");
391 $dbh->do("DROP TABLE `old_geocountry`");
392 $dbh->do("COMMIT");
393 #unlink PREFIX.'/data/'.unpackname;
394}