1 /**
2  * Copyright:
3  * (C) 2016 Martin Brzenska
4  *
5  * License:
6  * Distributed under the terms of the MIT license.
7  * Consult the provided LICENSE.md file for details
8  */
9 module libdominator.Filter;
10 
11 import std.regex : StaticRegex , ctRegex , matchAll , matchFirst;
12 import std.string : chompPrefix , chomp , strip;
13 
14 import libdominator;
15 
16 auto rDomFilterExpression = ctRegex!(`([\w\d*]+)(\[(?:[,]?[\d]+|[\d]\.\.[\d$])+\])?(?:\{([^\}]+)\})?`);
17 auto rRangePicks = ctRegex!(`([\d]+)\.\.([\d$]+)`);
18 auto rListPicks = ctRegex!(`[\d]+`);
19 auto rAttribExpression = ctRegex!(`([^:]+):([^,]+)*[,]?`);
20 
21 
22 enum FilterPicktype { list,range }
23 
24 /**
25 * Use this to filter html
26 */
27 struct DomFilter {
28     import std.conv : to;
29     import std.array : split;
30     TagElement[] elements;
31     size_t i;
32 
33     /**
34     * A dominator specific array of filter expressions
35     */
36     this(string[] expressions)
37     {
38         foreach(string expression ; expressions)
39         {
40             this.addExpression(expression);
41         }
42     }
43      /**
44     * A dominator specific filter expression
45     */
46     this(string expression)
47     {
48         this.addExpression(expression);
49     }
50 
51     private void addExpression(string expression)
52     {
53         foreach(capt ; matchAll(expression, rDomFilterExpression) ) {
54             TagElement tagElement;
55 
56             capt.popFront();
57             tagElement.name = capt.front;
58             capt.popFront();
59             if ( ! capt.empty)
60             {
61                 auto pickCapt = matchFirst(capt.front, rRangePicks);
62                 if (!pickCapt.empty)
63                 {
64                     tagElement.picktype = FilterPicktype.range;
65                     tagElement.picks ~= to!short(pickCapt[1]);
66                     tagElement.picks ~= (pickCapt[2] == "$") ? 0 : to!short(pickCapt[2]);
67                 }
68                 else
69                 {
70                     tagElement.picktype = FilterPicktype.list;
71                     foreach (mItem; matchAll(capt.front, rListPicks))
72                     {
73                         tagElement.picks ~= to!short(mItem.hit());
74                     }
75                 }
76             }
77             capt.popFront();
78             if( ! capt.empty && capt.front.length) {
79                 tagElement.attribs = parseAttributexpression(capt.front);
80             }
81             this.elements ~= tagElement;
82         }
83     }
84 
85     ///parses the attribute filter expression and boxes it into an handy array of Attribute
86     Attribute[] parseAttributexpression(string expression) {
87         Attribute[] attribs;
88         foreach (mAttrib; matchAll(expression, rAttribExpression))
89         {
90             string key = chompPrefix(chomp(strip(mAttrib[1]), "\"'"), "\"'");
91             string[] values;
92             foreach (v; split(mAttrib[2]))
93             {
94                 values ~= chompPrefix(chomp(strip(v), "\"'"), "\"'");
95             }
96             attribs ~= Attribute(key, values);
97         }
98         return attribs;
99     }
100     unittest {
101         auto f = DomFilter();
102         assert(f.parseAttributexpression("class:myClass,id:myID") == [Attribute("class", ["myClass"]), Attribute("id", ["myID"])]);
103         assert(f.parseAttributexpression("class:myClass") == [Attribute("class", ["myClass"])]);
104         assert(f.parseAttributexpression("data-url:http://www.mab-on.net/") == [Attribute("data-url", ["http://www.mab-on.net/"])]);
105     }
106 
107     /**
108     * Moves the cursor to the next TagElement if exists
109     * Returns:
110     *   true if the cursor could be moved, otherwise false
111     */
112     bool next() {
113         if( 1 + this.i < this.elements.length ) {
114             this.i++;
115             return true;
116         }
117         return false;
118     }
119 
120     /**
121     * The current TagElement, which is under the cursor.
122     * if there is no TagElement, then a empty TagElement will be returned.
123     */
124     TagElement front() {
125         return this.elements.length ? this.elements[this.i] : TagElement() ;
126     }
127 
128     ///The number of following TagElements after the current TagElement
129     size_t followers() {
130         return this.elements.length == 0 ? 0 : this.elements.length - 1 - this.i;
131     }
132 
133     ///opApply on TagElements
134     int opApply(int delegate(ref TagElement) dg)
135     {
136         int result = 0;
137         for (int i = 0; i < this.elements.length; i++)
138         {
139             result = dg(this.elements[i]);
140             if (result)
141             {
142                 break;
143             }
144         }
145         return result;
146     }
147 
148     /**
149     * Checks if there are any TagElements.
150     * in other words: Checks if the DomFilter is loaded with some filterarguments or not.
151     */
152     bool empty() { return this.elements.length == 0; }
153 
154     unittest {
155         DomFilter filter;
156         assert(filter.empty == true);
157 
158         filter = DomFilter("p");
159         assert(filter.elements == [TagElement(FilterPicktype.list, [], "p", [])]);
160 
161         filter = DomFilter("p[1,2]");
162         assert(filter.elements == [TagElement(FilterPicktype.list, [1, 2], "p", [])]);
163 
164         filter = DomFilter("p[1..2]");
165         assert(filter.elements == [TagElement(FilterPicktype.range, [1, 2], "p", [])]);
166 
167         filter = DomFilter("p[1]{class:MyClass}");
168         assert(filter.elements == [TagElement(FilterPicktype.list, [1], "p", [Attribute("class", ["MyClass"])])]);
169 
170         filter = DomFilter("div.*.p[1..$]{class:MyClass}");
171         assert(filter.elements == [
172             TagElement(FilterPicktype.list, [], "div", []),
173             TagElement(FilterPicktype.list, [], "*", []),
174             TagElement(FilterPicktype.range, [1, 0], "p", [Attribute("class", ["MyClass"])])
175         ]);
176 
177         filter = DomFilter("div.a{id:myID}.p[1..$]{class:MyClass}");
178         assert(filter.elements == [
179             TagElement(FilterPicktype.list, [], "div", []),
180             TagElement(FilterPicktype.list, [], "a", [Attribute("id", ["myID"])]),
181             TagElement(FilterPicktype.range, [1, 0], "p", [Attribute("class", ["MyClass"])])
182         ]);
183     }
184 }
185 /**
186 * The TagElement is the struct for the atomic part of a filter expression.
187 * Examples:
188 * ---------------
189 * a[1]{class:someClass}
190 * ---------------
191 */
192 struct TagElement
193 {
194     FilterPicktype picktype;
195     ushort[] picks;
196     string name;
197     Attribute[] attribs;
198 
199     ///checks if the TagElement matches the given pick
200     bool has(size_t pick)
201     {
202         if (picks.length == 0)
203         {
204             return true;
205         }
206         if(this.picktype == FilterPicktype.range) {
207             if(this.picks[1] == 0 && this.picks[0] <= pick) { return true; }
208             else if(isBetween(pick , this.picks[0] , this.picks[1])) { return true; }
209         }
210         else
211         {
212             foreach (size_t i; picks)
213             {
214                 if (i == pick)
215                 {
216                     return true;
217                 }
218             }
219         }
220         return false;
221     }
222 }
223 
224 ///Filters the given DOM and returns the nodes, that matches the given filter expression
225 Node[] filterDom(Dominator dom , DomFilter expressions) {
226     return filterDom(dom,[expressions]);
227 }
228 ///ditto
229 Node[] filterDom(Dominator dom , string expressions) {
230     return filterDom(dom,[DomFilter(expressions)]);
231 }
232 
233 ///Filters the given DOM and returns the nodes, that matches the given filter expressions
234 Node[] filterDom(Dominator dom , DomFilter[] expressions) {
235     return dom.getNodes().filterDom(expressions);
236 }
237 
238 ///Filters the given nodes and returns the nodes, that matches the given filter expressions
239 Node[] filterDom(Node[] nodes , DomFilter[] expressions) {
240     if(expressions.length == 0) {return nodes;}
241     Node[] resultNodes;
242     foreach(DomFilter exp ; expressions) {
243        resultNodes ~= filterDom(nodes , exp);
244     }
245     return resultNodes;
246 }
247 
248 ///Filters the given nodes and returns the nodes, that matches the given filter expression
249 Node[] filterDom(Node[] nodes , DomFilter exp) {
250     import std.uni : icmp;
251     if(exp.empty) { return nodes; }
252     Node[] resultNodes;
253     uint hit;
254     bool attribMatch;
255     foreach(Node node ; nodes) {
256         if(
257             exp.followers
258             && node.hasChildren()
259             && ( 0 == icmp(exp.front.name , node.getTag()) || exp.front.name == "*" )
260             && exp.front.has(++hit)
261         ) {
262             if( exp.front.attribs.length ) {
263                 attribMatch = false;
264                 foreach(Attribute attrib ; exp.front.attribs) {
265                     if( attrib.matches(node)) {
266                         attribMatch = true;
267                         break;
268                     }
269                 }
270                 if( ! attribMatch) { continue; }
271             }
272 
273             DomFilter cExp = exp;
274             cExp.next;
275             resultNodes ~= filterDom(node.getChildren() , cExp);
276         }
277         else if( !exp.followers && ( 0 == icmp(exp.front.name , node.getTag()) || exp.front.name == "*" ) ) {
278             if( exp.front.attribs.length ) {
279                 foreach(Attribute attrib ; exp.front.attribs) {
280                     if( attrib.matches(node) && exp.front.has(++hit)) {
281                         resultNodes ~= node;
282                         break;
283                     }
284                 }
285             }
286             else if(exp.front.has(++hit)) {
287                 resultNodes ~= node;
288             }
289         }
290     }
291     return resultNodes;
292 }
293 ///ditto
294 Node[] filterDom(Node[] nodes , string expression) {
295     return filterDom(nodes , DomFilter(expression));
296 }
297 
298 /**
299  throws the nodes away which are inside of a comment
300  Returns:
301   Node[]
302 */
303 Node[] filterComments(Node[] nodes) {
304     import std.algorithm.mutation : remove;
305     return remove!(n => n.isComment())(nodes);
306 }
307 
308 /**
309  ditto
310 */
311 Node[] filterComments(Dominator dom) {
312     return dom.getNodes.filterComments();
313 }