1 /** 2 * Copyright: 3 * (C) 2016 Martin Brzenska 4 * 5 * License: 6 * Distributed under the terms of the MIT license. 7 * Consult the provided LICENSE.md file for details 8 */ 9 module libdominator.Filter; 10 11 import std.regex : StaticRegex , ctRegex , matchAll , matchFirst; 12 import std.string : chompPrefix , chomp , strip; 13 14 import libdominator; 15 16 auto rDomFilterExpression = ctRegex!(`([\w\d*]+)(\[(?:[,]?[\d]+|[\d]\.\.[\d$])+\])?(?:\{([^\}]+)\})?`); 17 auto rRangePicks = ctRegex!(`([\d]+)\.\.([\d$]+)`); 18 auto rListPicks = ctRegex!(`[\d]+`); 19 auto rAttribExpression = ctRegex!(`([^:]+):([^,]+)*[,]?`); 20 21 22 enum FilterPicktype { list,range } 23 24 /** 25 * Use this to filter html 26 */ 27 struct DomFilter { 28 import std.conv : to; 29 import std.array : split; 30 TagElement[] elements; 31 size_t i; 32 33 /** 34 * A dominator specific array of filter expressions 35 */ 36 this(string[] expressions) 37 { 38 foreach(string expression ; expressions) 39 { 40 this.addExpression(expression); 41 } 42 } 43 /** 44 * A dominator specific filter expression 45 */ 46 this(string expression) 47 { 48 this.addExpression(expression); 49 } 50 51 private void addExpression(string expression) 52 { 53 foreach(capt ; matchAll(expression, rDomFilterExpression) ) { 54 TagElement tagElement; 55 56 capt.popFront(); 57 tagElement.name = capt.front; 58 capt.popFront(); 59 if ( ! capt.empty) 60 { 61 auto pickCapt = matchFirst(capt.front, rRangePicks); 62 if (!pickCapt.empty) 63 { 64 tagElement.picktype = FilterPicktype.range; 65 tagElement.picks ~= to!short(pickCapt[1]); 66 tagElement.picks ~= (pickCapt[2] == "$") ? 0 : to!short(pickCapt[2]); 67 } 68 else 69 { 70 tagElement.picktype = FilterPicktype.list; 71 foreach (mItem; matchAll(capt.front, rListPicks)) 72 { 73 tagElement.picks ~= to!short(mItem.hit()); 74 } 75 } 76 } 77 capt.popFront(); 78 if( ! capt.empty && capt.front.length) { 79 tagElement.attribs = parseAttributexpression(capt.front); 80 } 81 this.elements ~= tagElement; 82 } 83 } 84 85 ///parses the attribute filter expression and boxes it into an handy array of Attribute 86 Attribute[] parseAttributexpression(string expression) { 87 Attribute[] attribs; 88 foreach (mAttrib; matchAll(expression, rAttribExpression)) 89 { 90 string key = chompPrefix(chomp(strip(mAttrib[1]), "\"'"), "\"'"); 91 string[] values; 92 foreach (v; split(mAttrib[2])) 93 { 94 values ~= chompPrefix(chomp(strip(v), "\"'"), "\"'"); 95 } 96 attribs ~= Attribute(key, values); 97 } 98 return attribs; 99 } 100 unittest { 101 auto f = DomFilter(); 102 assert(f.parseAttributexpression("class:myClass,id:myID") == [Attribute("class", ["myClass"]), Attribute("id", ["myID"])]); 103 assert(f.parseAttributexpression("class:myClass") == [Attribute("class", ["myClass"])]); 104 assert(f.parseAttributexpression("data-url:http://www.mab-on.net/") == [Attribute("data-url", ["http://www.mab-on.net/"])]); 105 } 106 107 /** 108 * Moves the cursor to the next TagElement if exists 109 * Returns: 110 * true if the cursor could be moved, otherwise false 111 */ 112 bool next() { 113 if( 1 + this.i < this.elements.length ) { 114 this.i++; 115 return true; 116 } 117 return false; 118 } 119 120 /** 121 * The current TagElement, which is under the cursor. 122 * if there is no TagElement, then a empty TagElement will be returned. 123 */ 124 TagElement front() { 125 return this.elements.length ? this.elements[this.i] : TagElement() ; 126 } 127 128 ///The number of following TagElements after the current TagElement 129 size_t followers() { 130 return this.elements.length == 0 ? 0 : this.elements.length - 1 - this.i; 131 } 132 133 ///opApply on TagElements 134 int opApply(int delegate(ref TagElement) dg) 135 { 136 int result = 0; 137 for (int i = 0; i < this.elements.length; i++) 138 { 139 result = dg(this.elements[i]); 140 if (result) 141 { 142 break; 143 } 144 } 145 return result; 146 } 147 148 /** 149 * Checks if there are any TagElements. 150 * in other words: Checks if the DomFilter is loaded with some filterarguments or not. 151 */ 152 bool empty() { return this.elements.length == 0; } 153 154 unittest { 155 DomFilter filter; 156 assert(filter.empty == true); 157 158 filter = DomFilter("p"); 159 assert(filter.elements == [TagElement(FilterPicktype.list, [], "p", [])]); 160 161 filter = DomFilter("p[1,2]"); 162 assert(filter.elements == [TagElement(FilterPicktype.list, [1, 2], "p", [])]); 163 164 filter = DomFilter("p[1..2]"); 165 assert(filter.elements == [TagElement(FilterPicktype.range, [1, 2], "p", [])]); 166 167 filter = DomFilter("p[1]{class:MyClass}"); 168 assert(filter.elements == [TagElement(FilterPicktype.list, [1], "p", [Attribute("class", ["MyClass"])])]); 169 170 filter = DomFilter("div.*.p[1..$]{class:MyClass}"); 171 assert(filter.elements == [ 172 TagElement(FilterPicktype.list, [], "div", []), 173 TagElement(FilterPicktype.list, [], "*", []), 174 TagElement(FilterPicktype.range, [1, 0], "p", [Attribute("class", ["MyClass"])]) 175 ]); 176 177 filter = DomFilter("div.a{id:myID}.p[1..$]{class:MyClass}"); 178 assert(filter.elements == [ 179 TagElement(FilterPicktype.list, [], "div", []), 180 TagElement(FilterPicktype.list, [], "a", [Attribute("id", ["myID"])]), 181 TagElement(FilterPicktype.range, [1, 0], "p", [Attribute("class", ["MyClass"])]) 182 ]); 183 } 184 } 185 /** 186 * The TagElement is the struct for the atomic part of a filter expression. 187 * Examples: 188 * --------------- 189 * a[1]{class:someClass} 190 * --------------- 191 */ 192 struct TagElement 193 { 194 FilterPicktype picktype; 195 ushort[] picks; 196 string name; 197 Attribute[] attribs; 198 199 ///checks if the TagElement matches the given pick 200 bool has(size_t pick) 201 { 202 if (picks.length == 0) 203 { 204 return true; 205 } 206 if(this.picktype == FilterPicktype.range) { 207 if(this.picks[1] == 0 && this.picks[0] <= pick) { return true; } 208 else if(isBetween(pick , this.picks[0] , this.picks[1])) { return true; } 209 } 210 else 211 { 212 foreach (size_t i; picks) 213 { 214 if (i == pick) 215 { 216 return true; 217 } 218 } 219 } 220 return false; 221 } 222 } 223 224 ///Filters the given DOM and returns the nodes, that matches the given filter expression 225 Node[] filterDom(Dominator dom , DomFilter expressions) { 226 return filterDom(dom,[expressions]); 227 } 228 ///ditto 229 Node[] filterDom(Dominator dom , string expressions) { 230 return filterDom(dom,[DomFilter(expressions)]); 231 } 232 233 ///Filters the given DOM and returns the nodes, that matches the given filter expressions 234 Node[] filterDom(Dominator dom , DomFilter[] expressions) { 235 return dom.getNodes().filterDom(expressions); 236 } 237 238 ///Filters the given nodes and returns the nodes, that matches the given filter expressions 239 Node[] filterDom(Node[] nodes , DomFilter[] expressions) { 240 if(expressions.length == 0) {return nodes;} 241 Node[] resultNodes; 242 foreach(DomFilter exp ; expressions) { 243 resultNodes ~= filterDom(nodes , exp); 244 } 245 return resultNodes; 246 } 247 248 ///Filters the given nodes and returns the nodes, that matches the given filter expression 249 Node[] filterDom(Node[] nodes , DomFilter exp) { 250 import std.uni : icmp; 251 if(exp.empty) { return nodes; } 252 Node[] resultNodes; 253 uint hit; 254 bool attribMatch; 255 foreach(Node node ; nodes) { 256 if( 257 exp.followers 258 && node.hasChildren() 259 && ( 0 == icmp(exp.front.name , node.getTag()) || exp.front.name == "*" ) 260 && exp.front.has(++hit) 261 ) { 262 if( exp.front.attribs.length ) { 263 attribMatch = false; 264 foreach(Attribute attrib ; exp.front.attribs) { 265 if( attrib.matches(node)) { 266 attribMatch = true; 267 break; 268 } 269 } 270 if( ! attribMatch) { continue; } 271 } 272 273 DomFilter cExp = exp; 274 cExp.next; 275 resultNodes ~= filterDom(node.getChildren() , cExp); 276 } 277 else if( !exp.followers && ( 0 == icmp(exp.front.name , node.getTag()) || exp.front.name == "*" ) ) { 278 if( exp.front.attribs.length ) { 279 foreach(Attribute attrib ; exp.front.attribs) { 280 if( attrib.matches(node) && exp.front.has(++hit)) { 281 resultNodes ~= node; 282 break; 283 } 284 } 285 } 286 else if(exp.front.has(++hit)) { 287 resultNodes ~= node; 288 } 289 } 290 } 291 return resultNodes; 292 } 293 ///ditto 294 Node[] filterDom(Node[] nodes , string expression) { 295 return filterDom(nodes , DomFilter(expression)); 296 } 297 298 /** 299 throws the nodes away which are inside of a comment 300 Returns: 301 Node[] 302 */ 303 Node[] filterComments(Node[] nodes) { 304 import std.algorithm.mutation : remove; 305 return remove!(n => n.isComment())(nodes); 306 } 307 308 /** 309 ditto 310 */ 311 Node[] filterComments(Dominator dom) { 312 return dom.getNodes.filterComments(); 313 }