]> projects.mako.cc - selectricity/blob - vendor/plugins/sitealizer/lib/sitealizer/parser.rb
Merge from jdong; new webstats
[selectricity] / vendor / plugins / sitealizer / lib / sitealizer / parser.rb
1 # Copyright (c) 2007 Thiago Jackiw
2
3 # Permission is hereby granted, free of charge, to any person obtaining a copy
4 # of this software and associated documentation files (the "Software"), to deal
5 # in the Software without restriction, including without limitation the rights
6 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 # copies of the Software, and to permit persons to whom the Software is
8 # furnished to do so, subject to the following conditions:
9
10 # The above copyright notice and this permission notice shall be included in all
11 # copies or substantial portions of the Software.
12
13 # The "Created with Sitealizer" footer text should not be removed from the 
14 # locations where it's currently shown (under the '/sitealizer' controller)
15
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 # SOFTWARE.
23
24 require 'uri'
25
26 module Sitealizer
27   
28   # The Parser module is used only when processing the stats and
29   # not when saving the user info, this way it won't influence
30   # on your site's performance 
31   # 
32   module Parser
33     
34     class UserAgent
35       
36       # Process the user agent string and returns 
37       # the users's platform:
38       # 
39       #   Sitealizer::Parser::UserAgent.get_platform("(Macintosh; U; PPC Mac OS X; en)")
40       #   => "Macintosh"
41       # 
42       def self.get_platform(user_agent)
43         platform = nil
44         if user_agent =~ /Win/i
45           platform = "Windows"
46         elsif user_agent =~ /Mac/i
47           platform = "Macintosh"
48         elsif user_agent =~ /Linux/i
49           platform = "Linux";
50         elsif user_agent =~ /SunOS/i
51           platform = "Sun Solaris";
52         elsif user_agent =~ /BSD/i
53           platform = "FreeBSD";
54         else
55           platform = "Other"
56         end
57         return platform
58       end
59
60       # Process the user agent string and returns
61       # the user's browser info as a hash:
62       # 
63       #   user_agent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; nl; rv:1.8) Gecko/20051107 Firefox/1.5"
64       #   Sitealizer::Parser::UserAgent.browser_info(user_agent)
65       #   => {:type => 'Firefox', :version => '1.5'}
66       # 
67       def self.browser_info(user_agent)
68         browser = {
69           :type => nil,
70           :version => nil
71         }
72         #Internet Exlorer
73         if user_agent =~ /MSIE/i && user_agent.scan(/AOL|America Online Browser/i).empty?
74           browser[:type] = "MSIE";
75           browser[:version] = user_agent.scan(/MSIE ([\d\.]+)/i).to_s
76         #Firefox/Firebird/Phoenix
77         elsif user_agent =~ /Firefox|Firebird|Phoenix/i
78           browser[:type] = "Firefox";
79           browser[:version] = user_agent.scan(/[Firefox|Firebird|Phoenix].\/(\d.+)/i).to_s
80         #Galeon
81         elsif user_agent =~ /Galeon/i
82           browser[:type] = "Galeon";
83           browser[:version] = user_agent.scan(/Galeon\/([\d\.]+)/i).to_s
84         #Safari
85         elsif user_agent =~ /Safari/i
86           browser[:type] = "Safari";
87           browser[:version] = nil
88         #Opera
89         elsif user_agent =~ /Opera/i
90           browser[:type] = "Opera";
91           browser[:version] = user_agent.scan(/Opera[ |\/]([\d\.]+)/i).to_s
92         #AOL/America Online Browser
93         elsif user_agent =~ /AOL|America Online Browser/i
94           browser[:type] = "AOL"
95           browser[:version] = if user_agent =~ /AOL/i
96               user_agent.scan(/AOL[ |\/]([\d.]+)/i).uniq.to_s
97             else
98               user_agent.scan(/America Online Browser ([\d\.]+)/i).to_s
99            end
100         #Camino
101         elsif user_agent =~ /Camino/i
102           browser[:type] = "Camino";
103           browser[:version] = user_agent.scan(/Camino\/([\d\.]+)/i).to_s
104         #Konqueror
105         elsif user_agent =~ /Konqueror/i
106           browser[:type] = "Konqueror";
107           browser[:version] = user_agent.scan(/Konqueror\/([\d.]+)/i).to_s
108         #K-Meleon
109         elsif user_agent =~ /K-Meleon/i
110           browser[:type] = "K-Meleon";
111           browser[:version] = user_agent.scan(/K-Meleon\/([\d.]+)/i).to_s
112         #Firefox BonEcho
113         elsif user_agent =~ /BonEcho/i
114           browser[:type] = "Firefox BonEcho";
115           browser[:version] = user_agent.scan(/BonEcho\/([\d.]+)/i).to_s
116         #Netscape
117         elsif user_agent =~ /Netscape/i
118           browser[:type] = "Netscape";
119           browser[:version] = user_agent.scan(/Netscape\/([\d.]+)/i).to_s
120         #PSP
121         elsif user_agent =~ /PlayStation Portable/i
122           browser[:type] = "PlayStation Portable (PSP)";
123           browser[:version] = user_agent.scan(/PlayStation Portable\); ([\d\.]+)/i).to_s
124         #PlayStation 3
125         elsif user_agent =~ /PlayStation 3/i
126           browser[:type] = "PlayStation 3";
127           browser[:version] = user_agent.scan(/PlayStation 3; ([\d\.]+)/i).to_s
128         #Lynx
129         elsif user_agent =~ /Lynx/i
130           browser[:type] = "Lynx";
131           browser[:version] = user_agent.scan(/Lynx\/([\d\.]+)/i).to_s
132         else
133           browser[:type] = "Other";
134           browser[:version] = nil
135         end
136         return browser
137       end
138     end
139    
140     class Keyword
141       
142       # Process the referrers and returns
143       # the search terms if they're available:
144       # 
145       #   referer = "http://search.msn.com/results.aspx?srch=105&FORM=AS5&q=sitealizer"
146       #   Sitealizer::Parser::Keyword.get_terms(referer)
147       #   => 'sitealizer'
148       #
149       def self.get_terms(string)
150         return if string.nil?
151         begin
152           search_string = nil
153           domain = URI::split(string)[2]
154           if domain =~ /[google|alltheweb|search\.msn|ask|altavista|]\./ && string =~ /[?|&]q=/i
155             search_string = CGI.unescape(string.scan(/[?|&]q=([^&]*)/).flatten.to_s)
156           elsif domain =~ /yahoo\./i && string =~ /[?|&]p=/i
157             search_string = CGI.unescape(string.scan(/[?|&]p=([^&]*)/).flatten.to_s)
158           elsif domain =~ /search\.aol\./i && string =~ /[?|&]query=/i
159             search_string = CGI.unescape(string.scan(/[?|&]query=([^&]*)/).flatten.to_s)
160           end
161           return search_string          
162         rescue
163           return nil
164         end    
165       end
166       
167       # Process the referrers and returns the referer domain. 
168       # <em>Host</em> is your site's url (request.host) but 
169       # you don't have to worry about this, it's all handled
170       # by the sitealizer controller
171       # 
172       #   referer = "http://search.msn.com/results.aspx?srch=105&FORM=AS5&q=sitealizer"
173       #   Sitealizer::Parser::Keyword.get_domain(referer, 'localhost')
174       #   => "search.msn.com"
175       #
176       def self.get_domain(string, host)
177         return if string.nil?
178         domain = nil
179         domain = URI::split(string)[2]
180         return domain != host ? domain : nil
181       end
182     end
183     
184     class Robot
185       
186       # Process the robots (when available) found on the
187       # user agent strings and returns its name:
188       # 
189       #   user_agent = "Googlebot/2.X (+http://www.googlebot.com/bot.html)"
190       #   Sitealizer::Parser::Robot.get_name(user_agent)
191       #   => "Googlebot"
192       #      
193       def self.get_name(agent)
194         robot = nil
195         if agent =~ /Atomz/i
196           robot = 'Atomz.com'
197         elsif agent =~ /Googlebot/i
198           robot = 'Googlebot'
199         elsif agent =~ /InfoSeek/i
200           robot = 'InfoSeek'
201         elsif agent =~ /Ask Jeeves/i
202           robot = 'Ask Jeeves'
203         elsif agent =~ /Lycos/i
204           robot = 'Lycos'
205         elsif agent =~ /MSNBOT/i
206           robot = 'MSNBot'
207         elsif agent =~ /Slurp/i && agent.scan(/Yahoo/i).empty?
208           robot = 'Inktomi'
209         elsif agent =~ /Yahoo/i
210           robot = 'Yahoo Slurp'
211         end
212         return robot
213       end      
214     end
215     
216     class Language
217
218       # Process the user language and returns the
219       # 'human readable' format if found, otherwise
220       # its raw code gets returned:
221       # 
222       #   Sitealizer::Parser::Language.get_name('en-us')
223       #   => "English/United States"
224       #
225       def self.get_name(lang)
226         lang = lang.scan(/([^,;].*)/).to_s
227         lang = lang.slice(0,5)
228         languages = {"af" => "Afrikaans",
229                      "sq" => "Albanian",
230                      "eu" => "Basque",
231                      "bg" => "Bulgarian",
232                      "be" => "Byelorussian",
233                      "ca" => "Catalan",
234                      "zh" => "Chinese",
235                      "zh-cn" => "Chinese/China",
236                      "zh-tw" => "Chinese/Taiwan",
237                      "zh-hk" => "Chinese/Hong Kong",
238                      "zh-sg" => "Chinese/singapore",
239                      "hr" => "Croatian",
240                      "cs" => "Czech",
241                      "da" => "Danish",
242                      "nl" => "Dutch",
243                      "nl-nl" => "Dutch/Netherlands",
244                      "nl-be" => "Dutch/Belgium",
245                      "en" => "English",
246                      "en-gb" => "English/United Kingdom",
247                      "en-us" => "English/United States",
248                      "en-au" => "English/Australian",
249                      "en-ca" => "English/Canada",
250                      "en-nz" => "English/New Zealand",
251                      "en-ie" => "English/Ireland",
252                      "en-za" => "English/South Africa",
253                      "en-jm" => "English/Jamaica",
254                      "en-bz" => "English/Belize",
255                      "en-tt" => "English/Trinidad",
256                      "et" => "Estonian",
257                      "fo" => "Faeroese",
258                      "fa" => "Farsi",
259                      "fi" => "Finnish",
260                      "fr" => "French",
261                      "fr-be" => "French/Belgium",
262                      "fr-fr" => "French/France",
263                      "fr,fr" => "French/France",
264                      "fr-ch" => "French/Switzerland",
265                      "fr-ca" => "French/Canada",
266                      "fr-lu" => "French/Luxembourg",
267                      "gd" => "Gaelic",
268                      "gl" => "Galician",
269                      "de" => "German",
270                      "de-at" => "German/Austria",
271                      "de-de" => "German/Germany",
272                      "de-ch" => "German/Switzerland",
273                      "de-lu" => "German/Luxembourg",
274                      "de-li" => "German/Liechtenstein",
275                      "el" => "Greek",
276                      "he" => "Hebrew",
277                      "he-il" => "Hebrew/Israel",
278                      "hi" => "Hindi",
279                      "hu" => "Hungarian",
280                      "ie-ee" => "Internet Explorer/Easter Egg",
281                      "is" => "Icelandic",
282                      "id" => "Indonesian",
283                      "in" => "Indonesian",
284                      "ga" => "Irish",
285                      "it" => "Italian",
286                      "it-ch" => "Italian/ Switzerland",
287                      "ja" => "Japanese",
288                      "ko" => "Korean",
289                      "lv" => "Latvian",
290                      "lt" => "Lithuanian",
291                      "mk" => "Macedonian",
292                      "ms" => "Malaysian",
293                      "mt" => "Maltese",
294                      "no" => "Norwegian",
295                      "pl" => "Polish",
296                      "pt-pt" => "Portuguese",
297                      "pt-br" => "Portuguese/Brazil",
298                      "rm" => "Rhaeto-Romanic",
299                      "ro" => "Romanian",
300                      "ro-mo" => "Romanian/Moldavia",
301                      "ru-ru" => "Russian",
302                      "ru-mo" => "Russian /Moldavia",
303                      "gd" => "Scots Gaelic",
304                      "sr" => "Serbian",
305                      "sk" => "Slovack",
306                      "sl" => "Slovenian",
307                      "sb" => "Sorbian",
308                      "es" => "Spanish",
309                      "es-do" => "Spanish",
310                      "es-ar" => "Spanish/Argentina",
311                      "es-co" => "Spanish/Colombia",
312                      "es-mx" => "Spanish/Mexico",
313                      "es-es" => "Spanish/Spain",
314                      "es-gt" => "Spanish/Guatemala",
315                      "es-cr" => "Spanish/Costa Rica",
316                      "es-pa" => "Spanish/Panama",
317                      "es-ve" => "Spanish/Venezuela",
318                      "es-pe" => "Spanish/Peru",
319                      "es-ec" => "Spanish/Ecuador",
320                      "es-cl" => "Spanish/Chile",
321                      "es-uy" => "Spanish/Uruguay",
322                      "es-py" => "Spanish/Paraguay",
323                      "es-bo" => "Spanish/Bolivia",
324                      "es-sv" => "Spanish/El salvador",
325                      "es-hn" => "Spanish/Honduras",
326                      "es-ni" => "Spanish/Nicaragua",
327                      "es-pr" => "Spanish/Puerto Rico",
328                      "sx" => "Sutu",
329                      "sv" => "Swedish",
330                      "sv-se" => "Swedish/Sweden",
331                      "sv-fi" => "Swedish/Finland",
332                      "ts" => "Thai",
333                      "tn" => "Tswana",
334                      "tr" => "Turkish",
335                      "uk" => "Ukrainian",
336                      "ur" => "Urdu",
337                      "vi" => "Vietnamese",
338                      "xh" => "Xshosa",
339                      "ji" => "Yiddish",
340                      "zu" => "Zulu"}
341         return languages.fetch(lang, lang) 
342       end
343     end
344     
345   end
346   
347 end

Benjamin Mako Hill || Want to submit a patch?