From 5a3f7527db9339bc0ae078394bef4bc4e6f76188 Mon Sep 17 00:00:00 2001 From: Hideo Hattori <hattori-h@klab.com> Date: Tue, 10 May 2016 12:53:28 +0900 Subject: [PATCH 1/4] wip --- Cargo.toml | 1 + src/.dataset.rs.swp | Bin 0 -> 49152 bytes src/lib.rs | 2 ++ src/parser.rs | 9 ++++++--- 4 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 src/.dataset.rs.swp diff --git a/Cargo.toml b/Cargo.toml index 1a88620..95aef7a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,3 +10,4 @@ documentation = "http://hhatto.github.io/woothee-rust/woothee/" [dependencies] regex = "0.1" +lazy_static = "0.2.1" diff --git a/src/.dataset.rs.swp b/src/.dataset.rs.swp new file mode 100644 index 0000000000000000000000000000000000000000..d6e7bb6c0a323b224d6a73dfe767b97ccbeccefa GIT binary patch literal 49152 zcmeI53zQVqoyHqqL0r^D6E&`DNuq#p9y}y4EGjc20?rG1oB>TF)!j8SmFcN!tEw3W z5FgnXSB+;miW-l3cqAyU=w{<4Ml|bc6kS)@Wlwx$kHl!q8ul1hRuS36?C<telc61~ ze=#-4^r<@MN7K{O|C;-Kb*uWjx2if--8R2b`(ov^1k16B#D8Brx&5;Bj~&vMN+h~; z&$At~qD#H@F-xW^ovWB!T`^^5#gr)(Q>IO?%owSBXQl1x-FjtrR!>#hPG=&qdQZWs zUfb#BAKU9Pj0z|3dR4BIuFB}1?iyaD<9^KA;+J9tVg*7gkoP(&W{gctoie#vz4vqD zPt%SYcTwmv@f%_VVg+IaVg+IaVg+IaVg+IaVg*L70$y%R;tKx!L;NNb6Tv9|dGYZu z|9R4XY%6*FaR2#2|8Yaf>qq#{oBYS!C9i+Vf8O9f&huAXT>ohQ=gt1(%_XlNS@Qbp zC9jY1UvKdrr}zyn)<3G`^{Y!>Kh%G{sMFSx*Zr5{hggAFfmnf9fmnf9fmnf9fmnf9 zfmnf9fmnfqQUN`aNF2joc7+PU%>Nhj|4;bg?QjLGhS~7`(TT)P*aG)~1v6n3Y#EzK zJOpdNg4r+{{_W2aiSNKS;8K_cqv1aY+~0vLTm)yqk?;aR{Z8nCS@3z-O}Kv%?g9&D zz!C5^g94jj9XK!>j)WH(On3~|f&;T*G(68B#2xT;xCCavVX%e4i(A2fCYT6^!;=hp z+z6MzBp3(V83@=6e+x_C40xBZfLGu_SOZI7JiLvjkHUIT`mTnOMPn(R^<Y}A;dm3r zx0}|C@s*yv)b$+G>YO;~tc3ctlI8MAvl1mgDn6~OZ0|p_Vv*tIvtCV2#^})b&%{al zdPiFKj85C>tI@{i(!uLiD@r+bFMsb+uP<kWc$=m7s9&dz8Q!nWsGJ<Uy1iPXtt>QR zFwrEn%0srv)sd+tDODb#UDR}8Q*(Qh)@C@7sTW)*Qtg%*bQ_2M)11`${~!DZ4ANO; zZz^_GBel#ldV}91>Za0S!z0^V!*WbVn{TBngWEGy;gVUy%vokc8=Jh^@JQF3GaP;R zMH8+@vaJbA4Ub%n_6jqbJph8uH65eFULNd>L^5%);Sp_4E|)ceTQcO1sdwm(8N42e zVo71G;SuZt+wMFNiq&;Fc8|ariDJ@P!=qTEnRaZ~?(l-`AL?dyex@@b$z+vAykPS! z&u}cm)6QR>%i2y<AJZC>^Up7FJE6SIvZkKASIl2E+C1IuYSeRol`Q10v$Vao=5w^! z#b2nYv5ek{<gZ*lrFB?Zr{OL2Bi2$smrR_bRh(1&{e2Z&nMmYaL(Az7L8_)^uhq>e zyySYBnwqqo%^GRXv@N%$#{U~<C90~l)-Kc4I?Sx0@u=rLyQ0&uI7)j)<}_`N)u*Mq zbgR>FHP6<(J-^$_aY;)VTGn>G(-JEGKTo#b0V@9!TMNs({?Fb~KaZDKfmnf9fmnf9 zfmnf9fmnf9fmnf9fmnf9fmneNroiQe!4H3&qLBZOrv<Cj+%);Wcz*vY<o>%M2WsyB zD0r87{^#Ic@L)EKftQ%)|7W-bT$l-mz!v8DzYW(waqj;x{{I~F{I^3FEQASg7(B~d z|IMI76`TlfGtd98um&!M3Gg0s{M+D1a1(q9PKO=L@vC|N4R94)3KQUenB#vGt_Keq zU=r+Lj(-ze3vF;N91rg>&;KCgp$107JIwPx1M9(tYIu*qk|*FkxCYWt1-sDlNw^7= zt}9`%MGZogERl}k9q?KGMm?=HCl{9twv{wdJ>6@lYacWEqC&%^m)oN|Uarfw1S7br zK_zEHgnfH)s?|R$qONqjQmvw7kx0l{7LrosDPoIqkIgZftq~~;1%s;%mzoFm#GILD z2e(wX2}(+piD-iJwxps7N|lFbje=dQP&7%Y@(``5Rhw2B-AQ0gt<!?LdwD1~rMfyg z-DJheL%78)!JCM%pGr!VhiGl4>j_Q*66z+bRvyxQF|y4~P^mm56MSYU-O|+j|4hQ^ zw^T+Y{}->{e~G;RkFWyj;8@s3*53@@g;lTsj)Pap`uD@Fa2YhgU&4Qp^&f<*p&QPF zQ{YXq{tsX^G{D(#6ue2^e-QFe1Eb*`^8VBCeQ;nZ>>}$w1rNZrV8AqZkF5VNTnm@M z0#NyXCt3f;unHEyR2U1}$@&k&N;nUWhWE+(&%!-$8BBrq$@)*i{csgr3KQT>vi?R` z2^lyW#=%Qysr0;UAGv?9v4D^KtR(IgpGFfjYqNT~yVh`=K237|cj-o>{+QAe@c&d& zQgOKYTiD<owVcuHv;L%IaJ}+U(mw$;ve@p|lZ*nCmyo8U$&!98DpY3Lwb+^9jwcdN zRH@w5(<4()QmNe3lMOn{wmw;{@=|b)m2qq{qD@d-Y`8Q$Fr&Wx=j2Cq$dFeX*{anh z>!UeLhzgBttr|=#zdRbXgr!EdUj0+4T3uFmT~lyLw&Zv~&5v|(mH(>=pMUM=|0X|( zK1trc306T8s^Jf0{YT+iXn>R9x8(e-uo13<Bpe5?k@MF>A6x*{a5%h1&R-85PzC=> z#(xZMgkETbGhrt=|3`2o)WbM<m5l!k+yz&{JQxixlkvX|HmGa;&w$^P?_Y-Rg8|cF zH~Ic)xCO3&Bviw0GXBGGJv76~@LTfzR`@Qgf^*<Qvi%mg3EJUwI2v9>J9VAE((r3a z$B;`g+m}50lfq3Vm&I_|uQs(lqPfgsMNz>>yOHg3?Q)}?QrE1sI=bbi?Tq9yAEnHI zvl)5{6+%qU@40R|cn_pjRWd3NdUd{Y`_WKQH!ie3jC|31nC$CGnY#6n-HB3fkY(g~ z7ovd@&q9=o-Ylbk1D0+3BhM;v^CU)zi$!nKSIy3HwTPw_cdEfnl-MNFDte3B=QlLe zH8%#gLG1%0=}Dp17}0S{yiSQz>T=Vx-4iTX^ll}&%)3^%x`WppnO)&l8IfPbm1_MT zID(ri@vT@ixLWka>G|NbMrN6GWf7UB=e1Oyc42+}{NOf9e3Jb6(L0o}>C<*_3nFtV z%nBoNO1eskOVV1AIyK+`w9k1_mH$s6ubL{iGXLM;XV=Hc``5!l_$<6h*8g|73$BC< z;7Cx}|2EK}3QmOW<owNWBecLt@Jq7(J+KUxLN$zqtz><5&i|L;bFiJ9{}9{>D`7F5 z056mC*FzR6;Qz?@zlKe48(adXz;<%}-$Nei;4C--UMJ^ofNq!pACmE(f}7!TXoqRA zo1Fg${0%h23GfCP|9MykS3v`)-2Vca-2qCwE~VecEvhFBvJ?{W;*`Byn>t-^UYe>f zz*e0b<`Bcuf>S8fJNLxkq1^=~<XZhSEN`TwL#b*a#Ukxd1-DwVdSBAh)eSi`!D*8b zS(db|KH2`ZWcRm!ZiQ^5#Ily8K&|1mYpP7a2zuBjXqTXoGP?wYhS#n(Q*!d;$nRjX zcFIhX)f!&kT5QMDQ_>rgjqKhgZ>!8d!CBVPn^&9>(ArH?yXb)JB#Wz+tuT_~T5*-q zmP$-3)<iskcEjtQOv(tlT(Qxz6-Mq|@M=Rk7hG@{jjK-zZbuo>aqg2ePSnmY#)%3J zqj91s)PvbLmH&y|iImE(<o{+r!>W4%{usUqUxl+l-T(ie;OnpmG*DvzuaNm4hE;G5 z90NZm^WO<JEQT}SP<V>W|4mo|pNAc+T+RP)fIeu0li@Wo|3+92bK!XS1G)cS;BGLX z8g`NSpMW*66z0LFVF#K25%?Ok!m02UxnEuHzYdmxn*aYS{EFQF51_^YE`*cd5O{&? z|3kPO=D;}k1-btoSO!a>0>;2|=(h%xj=Iuv&!XHPWZ9F&gLgdY-6cEFw4?WCr8DP# z0Pnw9Y|8c`9VhOuT5EgK!RP3Eqkm&}nUDhxeW67B1pNwbm}Hl^$Ee_P2aiQPrYku4 zNp+B+`6FMC-<jsMwzg;uX3EhW!LW?PA5~FYFm`D0Z$(jCg746xWO9+Ai%u_+*we-> z!|l8qqUwnDXlNtEp}SM@`(Q%ID>2P48=~rnHf;#EgAeUk|GKn#!|nF$T<|ePVpo4n zamf+w)&76iL1?S`zoS44(F-JI^;Zp1G<v)0%}z#$v#o8xo1Z!`4EX35a#CbyA1*?! z%s+k=?}p#nR@?toTfvQ#*cL*w=nX5(0X9o-suh7>LpzM^pK<a=O8ipQM2hX7TPpt( z;S(Jyvku7rPm}j=febXl383!(_e1D~d2lRjBkSJ>YeAg@pz{A8$@x#f&G02S6LyjH ze+3V~4bTk7!LP~s_rVI72b1ALvi^U<9q<*H3?Gp7)pz}^h1IYSDnVWI{}Z?l7J&x8 zBkR8i8{s-w2%m=EkoE6}Rj>f2!V&N~S^r+J;9U3={G6<R7u*2qdjDB)C~PL{-v(cS zO87mxy#bGay5?W$Xu#f!+WQQ$_&vSHwMgaBbZ=>`{o7gwX@mMvVOqRQQ+TazX7X;% zj4VimDj3`Jao>TdwmrWOEz!Gd`!ub^%o%PpJ*iMpSTD9IoJ|Y#oNh&E(}GWA*0fCb zcz#z(X7f%&xvo%ASTF8M!mi{8@OC4mwKzs}U(wlLm$Krjv1Q>bBl@3jb()eGBuNPU zhJ`2^`xVZwxrUMHFuZh^WWr7Em?}G0S~m79oM(+j#^ghW<MD-f5q%)+?`MU=E#krl zyKgH0Pav!ou3IPnxA?*JXXO30@MZV{d_dNJ4IYBEpu_2~lf3^^xB)JPI`|9tfXx3m zsC)mn!>RBVd4ChEgI;Kb&%iIp{C@{7%z<NI7n%P#xEJy;6UM++GXM9$f_6~X0*rwd z$o;oNH%x_5u!YS35Uc?UX2U4hO6I>Ex?nn-3hJK!&%;_+0%yVw^8RCRBlN<2I0N1y z_x}K{h6QjU{D#cG1=fSo@@fz+D#es6qMe9gnTFsJoP&J}ENOc2>ClE;Lrv}b5t1XQ z`YO%3teubQ?m?<zOFos%r-H*y?2~X0Ki|sqMLYGbDvrRZ_fQ-8f$vW}=5oPVjgnoR ztWw;8gdNE1H9fv*Ie3>O@kU%Mwnu1Bhn_Z4wk^19UE+_tT5OQeAb;dV<+s{AUsxVd zPYP2n_DbkglfKMwv?YcY(V<9IDYi#wPs+49Bb)ma7mMxLV~@)JMCC+U<<$ZCUtRxy z4Yb3l@D^Er6RZOlTHtt4-~G1%)P4WYg|V=ctp6<B16fe_{u>3)ko9i`1Daqw{E@8x z1bhP)g9g7N=RX4Kp8v&r0G!1CUm@q;56hq)#(}yIz*cwwR>67j=b-NWcQ>f}09*<) zK%E2d23cR-2Ve!%!m+T8tiKu7fI0`D0@U~YJr38wVo<sN7}!SE-vlcA&xI4<muR;U z)c5>lKxtXHp28mhC`Q@=kMS0`XA2yl%E12?&yC;&a7`R^Bp-<U6M5^TS5U-O39ZVS z%Z!X|J6foLZHZ;Y%ApF!z6pKnv<0Jx5}QPYVq1i^%+tB;s~$~Am)45S5t`%bS;OtH zowT7jIx|hy^k|F{7mJ+|I;D3xMu#>>{p)g-Vl*~|C>I+gG%9J>jrI!7u{#YXvPdYe v7W*XhiK~6wKCL#dhLAh55ncRPC>o+>?3U223ktKunmS8tu9^MlsQ>>1hQOm% literal 0 HcmV?d00001 diff --git a/src/lib.rs b/src/lib.rs index fdfd91b..dd709c3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,6 +21,8 @@ //! ``` //! +#[macro_use] +extern crate lazy_static; extern crate regex; pub mod woothee; diff --git a/src/parser.rs b/src/parser.rs index 310b1ea..6129dee 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -47,6 +47,10 @@ static RE_OSX_IPHONE_OS_VERSION: &'static str = r"; CPU(?: iPhone)? OS (\d+_\d+(?:_\d+)?) like Mac OS X"; static RE_OSX_OS_VERSION: &'static str = r"Mac OS X (10[._]\d+(?:[._]\d+)?)(?:\)|;)"; +lazy_static! { + static ref RX_XXX: Regex = Regex::new(RX_CHROME_PATTERN).unwrap(); +} + #[derive(Debug, Default)] pub struct WootheeResult { pub name: String, @@ -521,8 +525,7 @@ impl Parser { return false; } - let re_chrome = Regex::new(RX_CHROME_PATTERN).unwrap(); - if re_chrome.is_match(agent) { + if RX_XXX.is_match(agent) { let re_opera3 = Regex::new(RX_OPERA_VERSION_PATTERN3).unwrap(); if re_opera3.is_match(agent) { let version = match re_opera3.captures(agent) { @@ -542,7 +545,7 @@ impl Parser { return false; } - let version = match re_chrome.captures(agent) { + let version = match RX_XXX.captures(agent) { Some(caps) => caps.at(1).unwrap(), None => "", }; From 9e5a69f354ee262f8eef9f13acc2ed29d75e2027 Mon Sep 17 00:00:00 2001 From: Hideo Hattori <hattori-h@klab.com> Date: Tue, 10 May 2016 15:26:11 +0900 Subject: [PATCH 2/4] use lazy_static macro --- src/lib.rs | 1 + src/parser.rs | 176 ++++++++++++++++++++++++-------------------------- 2 files changed, 85 insertions(+), 92 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index dd709c3..e338ab6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +#![recursion_limit="100"] //! # Woothee //! //! Woothee is a user-agent strings parser. diff --git a/src/parser.rs b/src/parser.rs index 6129dee..6771b9a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3,52 +3,48 @@ use regex::Regex; use dataset; use woothee::VALUE_UNKNOWN; -static RX_CHROME_PATTERN: &'static str = r"(?:Chrome|CrMo|CriOS)/([.0-9]+)"; -static RX_DOCOMO_VERSION_PATTERN: &'static str = r#"DoCoMo/[.0-9]+[ /]([^- /;()"']+)"#; -static RX_FIREFOX_PATTERN: &'static str = r"Firefox/([.0-9]+)"; -static RX_FIREFOX_OS_PATTERN: &'static str = - r"^Mozilla/[.0-9]+ \((?:Mobile|Tablet);(?:.*;)? rv:([.0-9]+)\) Gecko/[.0-9]+ Firefox/[.0-9]+$"; -static RX_FIREFOX_IOS_PATTERN: &'static str = r"FxiOS/([.0-9]+)"; -static RX_FOMA_VERSION_PATTERN: &'static str = r"\(([^;)]+);FOMA;"; -static RX_HEADLINE_READER_PATTERN: &'static str = r"(?i)headline-reader"; -static RX_JIG_PATTERN: &'static str = r"jig browser[^;]+; ([^);]+)"; -static RX_KDDI_PATTERN: &'static str = r#"KDDI-([^- /;()"']+)"#; -static RX_MAYBE_RSS_PATTERN: &'static str = r"(?i)rss(?:reader|bar|[-_ /;()]|[ +]*/)"; -static RX_MAYBE_CRAWLER_PATTERN: &'static str = r"(?i)(?:bot|crawler|spider)(?:[-_ ./;@()]|$)"; -static RX_MAYBE_FEED_PARSER_PATTERN: &'static str = r"(?i)(?:feed|web) ?parser"; -static RX_MAYBE_WATCHDOG_PATTERN: &'static str = r"(?i)watch ?dog"; -static RX_MSEDGE_PATTERN: &'static str = r"Edge/([.0-9]+)"; -static RX_MSIE_PATTERN: &'static str = r"MSIE ([.0-9]+);"; -static RX_OPERA_VERSION_PATTERN1: &'static str = r"Version/([.0-9]+)"; -static RX_OPERA_VERSION_PATTERN2: &'static str = r"Opera[/ ]([.0-9]+)"; -static RX_OPERA_VERSION_PATTERN3: &'static str = r"OPR/([.0-9]+)"; -static RX_SAFARI_PATTERN: &'static str = r"Version/([.0-9]+)"; -static RX_SOFTBANK_PATTERN: &'static str = r"(?:SoftBank|Vodafone|J-PHONE)/[.0-9]+/([^ /;()]+)"; -static RX_TRIDENT_PATTERN: &'static str = r"Trident/([.0-9]+);"; -static RX_TRIDENT_VERSION_PATTERN: &'static str = r" rv:([.0-9]+)"; -static RX_IEMOBILE_PATTERN: &'static str = r"IEMobile/([.0-9]+);"; -static RX_WILLCOM_PATTERN: &'static str = r"(?:WILLCOM|DDIPOCKET);[^/]+/([^ /;()]+)"; -static RX_WINDOWS_VERSION_PATTERN: &'static str = r"Windows ([ .a-zA-Z0-9]+)[;\\)]"; -static RX_WIN_PHONE: &'static str = r"^Phone(?: OS)? ([.0-9]+)"; -static RX_WEBVIEW_PATTERN: &'static str = r"iP(hone;|ad;|od) .*like Mac OS X"; -static RX_WEBVIEW_VERSION_PATTERN: &'static str = r"Version/([.0-9]+)"; -static RX_PPC_OS_VERSION: &'static str = r"rv:(\d+\.\d+\.\d+)"; -static RX_FREEBSD_OS_VERSION: &'static str = r"FreeBSD ([^;\)]+);"; -static RX_CHROMEOS_OS_VERSION: &'static str = r"CrOS ([^\)]+)\)"; -static RX_ANDROIDOS_OS_VERSION: &'static str = r"Android[- ](\d+\.\d+(?:\.\d+)?)"; -static RX_PSP_OS_VERSION: &'static str = r"PSP \(PlayStation Portable\); ([.0-9]+)\)"; -static RX_PS3_OS_VERSION: &'static str = r"PLAYSTATION 3;? ([.0-9]+)\)"; -static RX_PSVITA_OS_VERSION: &'static str = r"PlayStation Vita ([.0-9]+)\)"; -static RX_PS4_OS_VERSION: &'static str = r"PlayStation 4 ([.0-9]+)\)"; -static RX_BLACKBERRY10_OS_VERSION: &'static str = r"BB10(?:.+)Version/([.0-9]+) "; -static RX_BLACKBERRY_OS_VERSION: &'static str = r"BlackBerry(?:\d+)/([.0-9]+) "; - -static RE_OSX_IPHONE_OS_VERSION: &'static str = - r"; CPU(?: iPhone)? OS (\d+_\d+(?:_\d+)?) like Mac OS X"; -static RE_OSX_OS_VERSION: &'static str = r"Mac OS X (10[._]\d+(?:[._]\d+)?)(?:\)|;)"; - lazy_static! { - static ref RX_XXX: Regex = Regex::new(RX_CHROME_PATTERN).unwrap(); + static ref RX_CHROME_PATTERN: Regex = Regex::new(r"(?:Chrome|CrMo|CriOS)/([.0-9]+)").unwrap(); + static ref RX_DOCOMO_VERSION_PATTERN: Regex = Regex::new(r#"DoCoMo/[.0-9]+[ /]([^- /;()"']+)"#).unwrap(); + static ref RX_FIREFOX_PATTERN: Regex = Regex::new(r"Firefox/([.0-9]+)").unwrap(); + static ref RX_FIREFOX_OS_PATTERN: Regex = Regex::new(r"^Mozilla/[.0-9]+ \((?:Mobile|Tablet);(?:.*;)? rv:([.0-9]+)\) Gecko/[.0-9]+ Firefox/[.0-9]+$").unwrap(); + static ref RX_FIREFOX_IOS_PATTERN: Regex = Regex::new(r"FxiOS/([.0-9]+)").unwrap(); + static ref RX_FOMA_VERSION_PATTERN: Regex = Regex::new(r"\(([^;)]+);FOMA;").unwrap(); + static ref RX_HEADLINE_READER_PATTERN: Regex = Regex::new(r"(?i)headline-reader").unwrap(); + static ref RX_JIG_PATTERN: Regex = Regex::new(r"jig browser[^;]+; ([^);]+)").unwrap(); + static ref RX_KDDI_PATTERN: Regex = Regex::new(r#"KDDI-([^- /;()"']+)"#).unwrap(); + static ref RX_MAYBE_RSS_PATTERN: Regex = Regex::new(r"(?i)rss(?:reader|bar|[-_ /;()]|[ +]*/)").unwrap(); + static ref RX_MAYBE_CRAWLER_PATTERN: Regex = Regex::new(r"(?i)(?:bot|crawler|spider)(?:[-_ ./;@()]|$)").unwrap(); + static ref RX_MAYBE_FEED_PARSER_PATTERN: Regex = Regex::new(r"(?i)(?:feed|web) ?parser").unwrap(); + static ref RX_MAYBE_WATCHDOG_PATTERN: Regex = Regex::new(r"(?i)watch ?dog").unwrap(); + static ref RX_MSEDGE_PATTERN: Regex = Regex::new(r"Edge/([.0-9]+)").unwrap(); + static ref RX_MSIE_PATTERN: Regex = Regex::new(r"MSIE ([.0-9]+);").unwrap(); + static ref RX_OPERA_VERSION_PATTERN1: Regex = Regex::new(r"Version/([.0-9]+)").unwrap(); + static ref RX_OPERA_VERSION_PATTERN2: Regex = Regex::new(r"Opera[/ ]([.0-9]+)").unwrap(); + static ref RX_OPERA_VERSION_PATTERN3: Regex = Regex::new(r"OPR/([.0-9]+)").unwrap(); + static ref RX_SAFARI_PATTERN: Regex = Regex::new(r"Version/([.0-9]+)").unwrap(); + static ref RX_SOFTBANK_PATTERN: Regex = Regex::new(r"(?:SoftBank|Vodafone|J-PHONE)/[.0-9]+/([^ /;()]+)").unwrap(); + static ref RX_TRIDENT_PATTERN: Regex = Regex::new(r"Trident/([.0-9]+);").unwrap(); + static ref RX_TRIDENT_VERSION_PATTERN: Regex = Regex::new(r" rv:([.0-9]+)").unwrap(); + static ref RX_IEMOBILE_PATTERN: Regex = Regex::new(r"IEMobile/([.0-9]+);").unwrap(); + static ref RX_WILLCOM_PATTERN: Regex = Regex::new(r"(?:WILLCOM|DDIPOCKET);[^/]+/([^ /;()]+)").unwrap(); + static ref RX_WINDOWS_VERSION_PATTERN: Regex = Regex::new(r"Windows ([ .a-zA-Z0-9]+)[;\\)]").unwrap(); + static ref RX_WIN_PHONE: Regex = Regex::new(r"^Phone(?: OS)? ([.0-9]+)").unwrap(); + static ref RX_WEBVIEW_PATTERN: Regex = Regex::new(r"iP(hone;|ad;|od) .*like Mac OS X").unwrap(); + static ref RX_WEBVIEW_VERSION_PATTERN: Regex = Regex::new(r"Version/([.0-9]+)").unwrap(); + static ref RX_PPC_OS_VERSION: Regex = Regex::new(r"rv:(\d+\.\d+\.\d+)").unwrap(); + static ref RX_FREEBSD_OS_VERSION: Regex = Regex::new(r"FreeBSD ([^;\)]+);").unwrap(); + static ref RX_CHROMEOS_OS_VERSION: Regex = Regex::new(r"CrOS ([^\)]+)\)").unwrap(); + static ref RX_ANDROIDOS_OS_VERSION: Regex = Regex::new(r"Android[- ](\d+\.\d+(?:\.\d+)?)").unwrap(); + static ref RX_PSP_OS_VERSION: Regex = Regex::new(r"PSP \(PlayStation Portable\); ([.0-9]+)\)").unwrap(); + static ref RX_PS3_OS_VERSION: Regex = Regex::new(r"PLAYSTATION 3;? ([.0-9]+)\)").unwrap(); + static ref RX_PSVITA_OS_VERSION: Regex = Regex::new(r"PlayStation Vita ([.0-9]+)\)").unwrap(); + static ref RX_PS4_OS_VERSION: Regex = Regex::new(r"PlayStation 4 ([.0-9]+)\)").unwrap(); + static ref RX_BLACKBERRY10_OS_VERSION: Regex = Regex::new(r"BB10(?:.+)Version/([.0-9]+) ").unwrap(); + static ref RX_BLACKBERRY_OS_VERSION: Regex = Regex::new(r"BlackBerry(?:\d+)/([.0-9]+) ").unwrap(); + + static ref RE_OSX_IPHONE_OS_VERSION: Regex = Regex::new(r"; CPU(?: iPhone)? OS (\d+_\d+(?:_\d+)?) like Mac OS X").unwrap(); + static ref RE_OSX_OS_VERSION: Regex = Regex::new(r"Mac OS X (10[._]\d+(?:[._]\d+)?)(?:\)|;)").unwrap(); } #[derive(Debug, Default)] @@ -467,10 +463,10 @@ impl Parser { } let mut version = VALUE_UNKNOWN; - let re_msie_caps = Regex::new(RX_MSIE_PATTERN).unwrap().captures(agent); - let re_trident_caps = Regex::new(RX_TRIDENT_PATTERN).unwrap().captures(agent); - let re_trident_ver_caps = Regex::new(RX_TRIDENT_VERSION_PATTERN).unwrap().captures(agent); - let re_ie_mobile_caps = Regex::new(RX_IEMOBILE_PATTERN).unwrap().captures(agent); + let re_msie_caps = RX_MSIE_PATTERN.captures(agent); + let re_trident_caps = RX_TRIDENT_PATTERN.captures(agent); + let re_trident_ver_caps = RX_TRIDENT_VERSION_PATTERN.captures(agent); + let re_ie_mobile_caps = RX_IEMOBILE_PATTERN.captures(agent); if re_msie_caps.is_some() { version = re_msie_caps.unwrap().at(1).unwrap(); @@ -490,8 +486,7 @@ impl Parser { } fn challenge_ms_edge(&self, agent: &str, result: &mut WootheeResult) -> bool { - let re_msedge = Regex::new(RX_MSEDGE_PATTERN).unwrap(); - if !re_msedge.is_match(agent) { + if !RX_MSEDGE_PATTERN.is_match(agent) { return false; }; @@ -503,8 +498,7 @@ impl Parser { } fn challenge_firefox_ios(&self, agent: &str, result: &mut WootheeResult) -> bool { - let re_firefox_ios = Regex::new(RX_FIREFOX_IOS_PATTERN).unwrap(); - match re_firefox_ios.captures(agent) { + match RX_FIREFOX_IOS_PATTERN.captures(agent) { Some(caps) => { result.version = caps.at(1).unwrap().to_string(); } @@ -525,10 +519,9 @@ impl Parser { return false; } - if RX_XXX.is_match(agent) { - let re_opera3 = Regex::new(RX_OPERA_VERSION_PATTERN3).unwrap(); - if re_opera3.is_match(agent) { - let version = match re_opera3.captures(agent) { + if RX_CHROME_PATTERN.is_match(agent) { + if RX_OPERA_VERSION_PATTERN3.is_match(agent) { + let version = match RX_OPERA_VERSION_PATTERN3.captures(agent) { Some(caps) => caps.at(1).unwrap(), None => "", }; @@ -545,7 +538,7 @@ impl Parser { return false; } - let version = match RX_XXX.captures(agent) { + let version = match RX_CHROME_PATTERN.captures(agent) { Some(caps) => caps.at(1).unwrap(), None => "", }; @@ -555,7 +548,7 @@ impl Parser { return true; } - let version = match Regex::new(RX_SAFARI_PATTERN).unwrap().captures(agent) { + let version = match RX_SAFARI_PATTERN.captures(agent) { Some(caps) => caps.at(1).unwrap(), None => VALUE_UNKNOWN, }; @@ -574,7 +567,7 @@ impl Parser { return false; } - let version = match Regex::new(RX_FIREFOX_PATTERN).unwrap().captures(agent) { + let version = match RX_FIREFOX_PATTERN.captures(agent) { Some(caps) => caps.at(1).unwrap(), None => VALUE_UNKNOWN, }; @@ -593,10 +586,10 @@ impl Parser { return false; } - let version = match Regex::new(RX_OPERA_VERSION_PATTERN1).unwrap().captures(agent) { + let version = match RX_OPERA_VERSION_PATTERN1.captures(agent) { Some(caps) => caps.at(1).unwrap(), None => { - match Regex::new(RX_OPERA_VERSION_PATTERN2).unwrap().captures(agent) { + match RX_OPERA_VERSION_PATTERN2.captures(agent) { Some(caps2) => caps2.at(1).unwrap(), None => VALUE_UNKNOWN, } @@ -613,7 +606,7 @@ impl Parser { } fn challenge_webview(&self, agent: &str, result: &mut WootheeResult) -> bool { - if !Regex::new(RX_WEBVIEW_PATTERN).unwrap().is_match(agent) || agent.contains("Safari/") { + if !RX_WEBVIEW_PATTERN.is_match(agent) || agent.contains("Safari/") { return false; } @@ -621,7 +614,7 @@ impl Parser { return false; } - let version = match Regex::new(RX_WEBVIEW_VERSION_PATTERN).unwrap().captures(agent) { + let version = match RX_WEBVIEW_VERSION_PATTERN.captures(agent) { Some(caps) => caps.at(1).unwrap(), None => "", }; @@ -638,8 +631,8 @@ impl Parser { } let mut version = VALUE_UNKNOWN; - let docomo_caps = Regex::new(RX_DOCOMO_VERSION_PATTERN).unwrap().captures(agent); - let foma_caps = Regex::new(RX_FOMA_VERSION_PATTERN).unwrap().captures(agent); + let docomo_caps = RX_DOCOMO_VERSION_PATTERN.captures(agent); + let foma_caps = RX_FOMA_VERSION_PATTERN.captures(agent); if docomo_caps.is_some() { version = docomo_caps.unwrap().at(1).unwrap(); } else if foma_caps.is_some() { @@ -660,7 +653,7 @@ impl Parser { } let mut version = VALUE_UNKNOWN; - let caps = Regex::new(RX_KDDI_PATTERN).unwrap().captures(agent); + let caps = RX_KDDI_PATTERN.captures(agent); if caps.is_some() { version = caps.unwrap().at(1).unwrap(); } @@ -680,7 +673,7 @@ impl Parser { } let mut version = VALUE_UNKNOWN; - let caps = Regex::new(RX_SOFTBANK_PATTERN).unwrap().captures(agent); + let caps = RX_SOFTBANK_PATTERN.captures(agent); if caps.is_some() { version = caps.unwrap().at(1).unwrap(); } @@ -699,7 +692,7 @@ impl Parser { } let mut version = VALUE_UNKNOWN; - let caps = Regex::new(RX_WILLCOM_PATTERN).unwrap().captures(agent); + let caps = RX_WILLCOM_PATTERN.captures(agent); if caps.is_some() { version = caps.unwrap().at(1).unwrap(); } @@ -718,7 +711,7 @@ impl Parser { return false; } - let caps = Regex::new(RX_JIG_PATTERN).unwrap().captures(agent); + let caps = RX_JIG_PATTERN.captures(agent); if caps.is_some() { result.version = caps.unwrap().at(1).unwrap().to_string(); } @@ -763,25 +756,25 @@ impl Parser { let mut os_version = ""; let d = if agent.contains("PSP (PlayStation Portable)") { - os_version = match Regex::new(RX_PSP_OS_VERSION).unwrap().captures(agent) { + os_version = match RX_PSP_OS_VERSION.captures(agent) { Some(caps) => caps.at(1).unwrap(), None => "", }; self.lookup_dataset("PSP") } else if agent.contains("PlayStation Vita") { - os_version = match Regex::new(RX_PSVITA_OS_VERSION).unwrap().captures(agent) { + os_version = match RX_PSVITA_OS_VERSION.captures(agent) { Some(caps) => caps.at(1).unwrap(), None => "", }; self.lookup_dataset("PSVita") } else if agent.contains("PLAYSTATION 3 ") || agent.contains("PLAYSTATION 3;") { - os_version = match Regex::new(RX_PS3_OS_VERSION).unwrap().captures(agent) { + os_version = match RX_PS3_OS_VERSION.captures(agent) { Some(caps) => caps.at(1).unwrap(), None => "", }; self.lookup_dataset("PS3") } else if agent.contains("PlayStation 4 ") { - os_version = match Regex::new(RX_PS4_OS_VERSION).unwrap().captures(agent) { + os_version = match RX_PS4_OS_VERSION.captures(agent) { Some(caps) => caps.at(1).unwrap(), None => "", }; @@ -850,7 +843,7 @@ impl Parser { } let mut win = w.unwrap(); - let caps = Regex::new(RX_WINDOWS_VERSION_PATTERN).unwrap().captures(agent); + let caps = RX_WINDOWS_VERSION_PATTERN.captures(agent); if caps.is_none() { result.category = win.category.clone(); result.os = win.name.clone(); @@ -871,7 +864,7 @@ impl Parser { "95" => self.lookup_dataset("Win95"), "CE" => self.lookup_dataset("WinCE"), _ => { - let caps = Regex::new(RX_WIN_PHONE).unwrap().captures(version); + let caps = RX_WIN_PHONE.captures(version); if caps.is_some() { version = caps.unwrap().at(1).unwrap(); self.lookup_dataset("WinPhone") @@ -920,13 +913,13 @@ impl Parser { } data = d.unwrap(); - let caps = Regex::new(RE_OSX_IPHONE_OS_VERSION).unwrap().captures(agent); + let caps = RE_OSX_IPHONE_OS_VERSION.captures(agent); if caps.is_some() { let v = caps.unwrap().at(1).unwrap(); version = v.replace("_", "."); } } else { - let caps = Regex::new(RE_OSX_OS_VERSION).unwrap().captures(agent); + let caps = RE_OSX_OS_VERSION.captures(agent); if caps.is_some() { let v = caps.unwrap().at(1).unwrap(); version = v.replace("_", "."); @@ -949,7 +942,7 @@ impl Parser { let mut os_version = String::new(); let d = if agent.contains("Android") { - let caps = Regex::new(RX_ANDROIDOS_OS_VERSION).unwrap().captures(agent); + let caps = RX_ANDROIDOS_OS_VERSION.captures(agent); if caps.is_some() { os_version = caps.unwrap().at(1).unwrap().to_string(); } @@ -986,14 +979,14 @@ impl Parser { } else if agent.contains("CFNetwork") { self.lookup_dataset("iOS") } else if agent.contains("BB10") { - let caps = Regex::new(RX_BLACKBERRY10_OS_VERSION).unwrap().captures(agent); + let caps = RX_BLACKBERRY10_OS_VERSION.captures(agent); if caps.is_some() { os_version = caps.unwrap().at(1).unwrap(); } result.version = VALUE_UNKNOWN.to_string(); self.lookup_dataset("BlackBerry10") } else if agent.contains("BlackBerry") { - let caps = Regex::new(RX_BLACKBERRY_OS_VERSION).unwrap().captures(agent); + let caps = RX_BLACKBERRY_OS_VERSION.captures(agent); if caps.is_some() { os_version = caps.unwrap().at(1).unwrap(); } @@ -1012,7 +1005,7 @@ impl Parser { // Firefox OS specific pattern // http://lawrencemandel.com/2012/07/27/decision-made-firefox-os-user-agent-string/ // https://github.com/woothee/woothee/issues/2 - let caps = Regex::new(RX_FIREFOX_OS_PATTERN).unwrap().captures(agent); + let caps = RX_FIREFOX_OS_PATTERN.captures(agent); if caps.is_some() { let c = caps.unwrap(); if c.len() > 1 { @@ -1038,7 +1031,7 @@ impl Parser { fn challenge_mobilephone(&self, agent: &str, result: &mut WootheeResult) -> bool { if agent.contains("KDDI-") { - let caps = Regex::new(RX_KDDI_PATTERN).unwrap().captures(agent); + let caps = RX_KDDI_PATTERN.captures(agent); if caps.is_some() { let term = caps.unwrap().at(1).unwrap(); let d = self.lookup_dataset("au"); @@ -1054,7 +1047,7 @@ impl Parser { } if agent.contains("WILLCOM") || agent.contains("DDIPOCKET") { - let caps = Regex::new(RX_WILLCOM_PATTERN).unwrap().captures(agent); + let caps = RX_WILLCOM_PATTERN.captures(agent); if caps.is_some() { let term = caps.unwrap().at(1).unwrap(); let d = self.lookup_dataset("willcom"); @@ -1197,8 +1190,7 @@ impl Parser { } fn challenge_maybe_rss_reader(&self, agent: &str, result: &mut WootheeResult) -> bool { - if Regex::new(RX_MAYBE_RSS_PATTERN).unwrap().is_match(agent) || - Regex::new(RX_HEADLINE_READER_PATTERN).unwrap().is_match(agent) || + if RX_MAYBE_RSS_PATTERN.is_match(agent) || RX_HEADLINE_READER_PATTERN.is_match(agent) || agent.contains("cococ/") { return self.populate_dataset(result, "VariousRSSReader"); } @@ -1207,12 +1199,12 @@ impl Parser { } fn challenge_maybe_crawler(&self, agent: &str, result: &mut WootheeResult) -> bool { - if Regex::new(RX_MAYBE_CRAWLER_PATTERN).unwrap().is_match(agent) || + if RX_MAYBE_CRAWLER_PATTERN.is_match(agent) || Regex::new(r"(?:Rome Client |UnwindFetchor/|ia_archiver |Summify |PostRank/)") .unwrap() .is_match(agent) || agent.contains("ASP-Ranker Feed Crawler") || - Regex::new(RX_MAYBE_FEED_PARSER_PATTERN).unwrap().is_match(agent) || - Regex::new(RX_MAYBE_WATCHDOG_PATTERN).unwrap().is_match(agent) { + RX_MAYBE_FEED_PARSER_PATTERN.is_match(agent) || + RX_MAYBE_WATCHDOG_PATTERN.is_match(agent) { return self.populate_dataset(result, "VariousCrawler"); } @@ -1250,19 +1242,19 @@ impl Parser { result.os_version = "98".to_string(); self.lookup_dataset("Win98") } else if agent.contains("Macintosh; U; PPC;") || agent.contains("Mac_PowerPC") { - let caps = Regex::new(RX_PPC_OS_VERSION).unwrap().captures(agent); + let caps = RX_PPC_OS_VERSION.captures(agent); if caps.is_some() { result.os_version = caps.unwrap().at(1).unwrap().to_string(); } self.lookup_dataset("MacOS") } else if agent.contains("X11; FreeBSD ") { - let caps = Regex::new(RX_FREEBSD_OS_VERSION).unwrap().captures(agent); + let caps = RX_FREEBSD_OS_VERSION.captures(agent); if caps.is_some() { result.os_version = caps.unwrap().at(1).unwrap().to_string(); } self.lookup_dataset("BSD") } else if agent.contains("X11; CrOS ") { - let caps = Regex::new(RX_CHROMEOS_OS_VERSION).unwrap().captures(agent); + let caps = RX_CHROMEOS_OS_VERSION.captures(agent); if caps.is_some() { result.os_version = caps.unwrap().at(1).unwrap().to_string(); } From ceae5ae4f04e22db1e6dacac6714b376032a173f Mon Sep 17 00:00:00 2001 From: Hideo Hattori <hattori-h@klab.com> Date: Tue, 10 May 2016 15:26:21 +0900 Subject: [PATCH 3/4] ignore vim file --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index a9d37c5..d4f917d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ target Cargo.lock +*.swp From c28f5ba1f1f3958bf00fca03aceb0c69f0513934 Mon Sep 17 00:00:00 2001 From: Hideo Hattori <hattori-h@klab.com> Date: Tue, 10 May 2016 15:38:02 +0900 Subject: [PATCH 4/4] more fast --- src/parser.rs | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 6771b9a..807a4b9 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -45,6 +45,11 @@ lazy_static! { static ref RE_OSX_IPHONE_OS_VERSION: Regex = Regex::new(r"; CPU(?: iPhone)? OS (\d+_\d+(?:_\d+)?) like Mac OS X").unwrap(); static ref RE_OSX_OS_VERSION: Regex = Regex::new(r"Mac OS X (10[._]\d+(?:[._]\d+)?)(?:\)|;)").unwrap(); + static ref RX_HTTP_CLIENT: Regex = Regex::new(r"^(?:Apache-HttpClient/|Jakarta Commons-HttpClient/|Java/)").unwrap(); + static ref RX_HTTP_CLIENT_OTHER: Regex = Regex::new(r"[- ]HttpClient(/|$)").unwrap(); + static ref RX_PHP: Regex = Regex::new(r"^(?:PHP|WordPress|CakePHP|PukiWiki|PECL::HTTP)(?:/| |$)").unwrap(); + static ref RX_PEAR: Regex = Regex::new(r"(?:PEAR HTTP_Request|HTTP_Request)(?: class|2)").unwrap(); + static ref RX_MAYBE_CRAWLER_OTHER: Regex = Regex::new(r"(?:Rome Client |UnwindFetchor/|ia_archiver |Summify |PostRank/)").unwrap(); } #[derive(Debug, Default)] @@ -1151,14 +1156,8 @@ impl Parser { fn challenge_http_library(&self, agent: &str, result: &mut WootheeResult) -> bool { // TODO: wip let mut version = ""; - let re_http_client = - Regex::new(r"^(?:Apache-HttpClient/|Jakarta Commons-HttpClient/|Java/)").unwrap(); - let re_http_client_other = Regex::new(r"[- ]HttpClient(/|$)").unwrap(); - let re_php = Regex::new(r"^(?:PHP|WordPress|CakePHP|PukiWiki|PECL::HTTP)(?:/| |$)") - .unwrap(); - let re_pear = Regex::new(r"(?:PEAR HTTP_Request|HTTP_Request)(?: class|2)").unwrap(); - - if re_http_client.is_match(agent) || re_http_client_other.is_match(agent) { + + if RX_HTTP_CLIENT.is_match(agent) || RX_HTTP_CLIENT_OTHER.is_match(agent) { version = "Java"; } else if agent.contains("Java(TM) 2 Runtime Environment,") { version = "Java"; @@ -1173,7 +1172,7 @@ impl Parser { version = "ruby" } else if agent.starts_with("Python-urllib/") || agent.starts_with("Twisted ") { version = "python"; - } else if re_php.is_match(agent) || re_pear.is_match(agent) { + } else if RX_PHP.is_match(agent) || RX_PEAR.is_match(agent) { version = "php"; } @@ -1199,10 +1198,8 @@ impl Parser { } fn challenge_maybe_crawler(&self, agent: &str, result: &mut WootheeResult) -> bool { - if RX_MAYBE_CRAWLER_PATTERN.is_match(agent) || - Regex::new(r"(?:Rome Client |UnwindFetchor/|ia_archiver |Summify |PostRank/)") - .unwrap() - .is_match(agent) || agent.contains("ASP-Ranker Feed Crawler") || + if RX_MAYBE_CRAWLER_PATTERN.is_match(agent) || RX_MAYBE_CRAWLER_OTHER.is_match(agent) || + agent.contains("ASP-Ranker Feed Crawler") || RX_MAYBE_FEED_PARSER_PATTERN.is_match(agent) || RX_MAYBE_WATCHDOG_PATTERN.is_match(agent) { return self.populate_dataset(result, "VariousCrawler");