1 /**
2  * Copyright:
3  * (C) 2016 Martin Brzenska
4  *
5  * License: 
6  * Distributed under the terms of the MIT license. 
7  * Consult the provided LICENSE.md file for details
8  */
9 module libdominator.Filter;
10 
11 import std.regex : StaticRegex , ctRegex , matchAll , matchFirst;
12 import std.string : chompPrefix , chomp , strip;
13 
14 import libdominator;
15 
16 auto rDomFilterExpression = ctRegex!(`([\w\d*]+)(\[(?:[,]?[\d]+|[\d]\.\.[\d$])+\])?(?:\{([^\}]+)\})?`);
17 auto rRangePicks = ctRegex!(`([\d]+)\.\.([\d$]+)`);
18 auto rListPicks = ctRegex!(`[\d]+`);
19 auto rAttribExpression = ctRegex!(`([^:]+):([^,]+)*[,]?`);
20 
21 
22 enum FilterPicktype { list,range }
23 
24 /**
25 * Use this to filter html
26 */
27 struct DomFilter {
28     import std.conv : to;
29     import std.array : split;
30     TagElement[] elements;
31     size_t i;
32     
33     /**
34     * A dominator specific array of filter expressions
35     */
36     this(string[] expressions)
37     {
38         foreach(string expression ; expressions)
39         {
40             this.addExpression(expression);
41         }
42     }
43      /**
44     * A dominator specific filter expression
45     */
46     this(string expression)
47     {
48         this.addExpression(expression);
49     }
50     
51     private void addExpression(string expression)
52     {
53         foreach(capt ; matchAll(expression, rDomFilterExpression) ) {
54             TagElement tagElement;
55             
56             capt.popFront();
57             tagElement.name = capt.front;
58             capt.popFront();
59             if ( ! capt.empty)
60             {
61                 auto pickCapt = matchFirst(capt.front, rRangePicks);
62                 if (!pickCapt.empty)
63                 {
64                     tagElement.picktype = FilterPicktype.range;
65                     tagElement.picks ~= to!short(pickCapt[1]);
66                     tagElement.picks ~= (pickCapt[2] == "$") ? 0 : to!short(pickCapt[2]);
67                 }
68                 else
69                 {
70                     tagElement.picktype = FilterPicktype.list;
71                     foreach (mItem; matchAll(capt.front, rListPicks))
72                     {
73                         tagElement.picks ~= to!short(mItem.hit());
74                     }
75                 }
76             }
77             capt.popFront();
78             if( ! capt.empty && capt.front.length) {    
79                 tagElement.attribs = parseAttributexpression(capt.front);
80             }
81             this.elements ~= tagElement;
82         }
83     }
84 
85     ///parses the attribute filter expression and boxes it into an handy array of Attribute
86     Attribute[] parseAttributexpression(string expression) {
87         Attribute[] attribs;
88         foreach (mAttrib; matchAll(expression, rAttribExpression))
89         {
90             string key = chompPrefix(chomp(strip(mAttrib[1]), "\"'"), "\"'");
91             string[] values;
92             foreach (v; split(mAttrib[2]))
93             {
94                 values ~= chompPrefix(chomp(strip(v), "\"'"), "\"'");
95             }
96             attribs ~= Attribute(key, values);
97         }
98         return attribs;
99     }
100     unittest {
101         auto f = DomFilter();
102         assert(f.parseAttributexpression("class:myClass,id:myID") == [Attribute("class", ["myClass"]), Attribute("id", ["myID"])]);
103         assert(f.parseAttributexpression("class:myClass") == [Attribute("class", ["myClass"])]);
104         assert(f.parseAttributexpression("data-url:http://www.mab-on.net/") == [Attribute("data-url", ["http://www.mab-on.net/"])]);
105     }
106 
107     /**
108     * Moves the cursor to the next TagElement if exists
109     * Returns:
110     *   true if the cursor could be moved, otherwise false
111     */
112     bool next() {
113         if( 1 + this.i < this.elements.length ) {
114             this.i++;
115             return true;
116         }
117         return false;
118     }
119     
120     /**
121     * The current TagElement, which is under the cursor.
122     * if there is no TagElement, then a empty TagElement will be returned.
123     */
124     TagElement front() {
125         return this.elements.length ? this.elements[this.i] : TagElement() ;
126     }
127     
128     ///The number of following TagElements after the current TagElement
129     size_t followers() {
130         return this.elements.length == 0 ? 0 : this.elements.length - 1 - this.i;
131     }
132     
133     ///opApply on TagElements
134     int opApply(int delegate(ref TagElement) dg)
135     {
136         int result = 0;
137         for (int i = 0; i < this.elements.length; i++)
138         {
139             result = dg(this.elements[i]);
140             if (result)
141             {
142                 break;
143             }
144         }
145         return result;
146     }
147     
148     /**
149     * Checks if there are any TagElements.
150     * in other words: Checks if the DomFilter is loaded with some filterarguments or not.
151     */  
152     bool empty() { return this.elements.length == 0; }
153     
154     unittest {
155         DomFilter filter;
156         assert(filter.empty == true);
157         
158         filter = DomFilter("p");
159         assert(filter.elements == [TagElement(FilterPicktype.list, [], "p", [])]);
160         
161         filter = DomFilter("p[1,2]");
162         assert(filter.elements == [TagElement(FilterPicktype.list, [1, 2], "p", [])]);
163 
164         filter = DomFilter("p[1..2]");
165         assert(filter.elements == [TagElement(FilterPicktype.range, [1, 2], "p", [])]);
166 
167         filter = DomFilter("p[1]{class:MyClass}");
168         assert(filter.elements == [TagElement(FilterPicktype.list, [1], "p", [Attribute("class", ["MyClass"])])]);
169 
170         filter = DomFilter("div.*.p[1..$]{class:MyClass}");
171         assert(filter.elements == [
172             TagElement(FilterPicktype.list, [], "div", []), 
173             TagElement(FilterPicktype.list, [], "*", []), 
174             TagElement(FilterPicktype.range, [1, 0], "p", [Attribute("class", ["MyClass"])])
175         ]);
176 
177         filter = DomFilter("div.a{id:myID}.p[1..$]{class:MyClass}");
178         assert(filter.elements == [
179             TagElement(FilterPicktype.list, [], "div", []), 
180             TagElement(FilterPicktype.list, [], "a", [Attribute("id", ["myID"])]), 
181             TagElement(FilterPicktype.range, [1, 0], "p", [Attribute("class", ["MyClass"])])
182         ]);
183     }
184 }
185 /**
186 * The TagElement is the struct for the atomic part of a filter expression.
187 * Examples:
188 * ---------------
189 * a[1]{class:someClass}
190 * ---------------
191 */
192 struct TagElement
193 {
194     FilterPicktype picktype;
195     ushort[] picks;
196     string name;
197     Attribute[] attribs;
198     
199     ///checks if the TagElement matches the given pick
200     bool has(size_t pick)
201     {
202         if (picks.length == 0)
203         {
204             return true;
205         }
206         if(this.picktype == FilterPicktype.range) {
207             if(this.picks[1] == 0 && this.picks[0] <= pick) { return true; }
208             else if(isBetween(pick , this.picks[0] , this.picks[1])) { return true; }
209         } 
210         else 
211         {
212             foreach (size_t i; picks)
213             {
214                 if (i == pick)
215                 {
216                     return true;
217                 }
218             }
219         }
220         return false;
221     }
222 }
223 
224 ///Filters the given DOM and returns the nodes, that matches the given filter expression
225 Node[] filterDom(Dominator dom , DomFilter expressions) {
226     return filterDom(dom,[expressions]);
227 }
228 
229 ///Filters the given DOM and returns the nodes, that matches the given filter expressions
230 Node[] filterDom(Dominator dom , DomFilter[] expressions) {
231     return dom.getNodes().filterDom(expressions);
232 }
233 
234 ///Filters the given Nodes and returns the nodes, that matches the given filter expressions
235 Node[] filterDom(Node[] nodes , DomFilter[] expressions) {
236     if(expressions.length == 0) {return nodes;}
237     Node[] resultNodes;
238     foreach(DomFilter exp ; expressions) {
239        resultNodes ~= filterDom(nodes , exp);
240     }
241     return resultNodes;
242 }
243 
244 ///Filters the given Nodes and returns the nodes, that matches the given filter expression
245 Node[] filterDom(Node[] nodes , DomFilter exp) {
246     if(exp.empty) { return nodes; }
247     Node[] resultNodes;
248     uint hit;
249     bool attribMatch;
250     foreach(Node node ; nodes) {
251         if(
252             exp.followers 
253             && node.hasChildren() 
254             && ( exp.front.name == node.getTag() || exp.front.name == "*" ) 
255             && exp.front.has(++hit) 
256         ) {
257             if( exp.front.attribs.length ) {
258                 attribMatch = false;
259                 foreach(Attribute attrib ; exp.front.attribs) {
260                     if( attrib.matches(node)) {
261                         attribMatch = true;
262                         break;
263                     }
264                 }
265                 if( ! attribMatch) { continue; }
266             }
267             
268             DomFilter cExp = exp;
269             cExp.next;
270             resultNodes ~= filterDom(node.getChildren() , cExp);
271         }
272         else if( !exp.followers && (exp.front.name == node.getTag() || exp.front.name == "*" ) ) {
273             if( exp.front.attribs.length ) {
274                 foreach(Attribute attrib ; exp.front.attribs) {
275                     if( attrib.matches(node) && exp.front.has(++hit)) {
276                         resultNodes ~= node;
277                         break;
278                     }
279                 }
280             }
281             else if(exp.front.has(++hit)) {
282                 resultNodes ~= node;
283             }
284         }
285     }
286     return resultNodes;
287 }
288 
289 /**
290  throws the Nodes away which are inside of a comment
291  Returns:
292   Node[]
293 */
294 Node[] filterComments(Node[] nodes) {
295     Node[] resultNodes;
296     foreach(node ; nodes) {
297         if(!node.isComment()) { 
298             resultNodes ~= node;
299         }
300     }
301     return resultNodes;
302 }
303 
304 /**
305  ditto
306 */
307 Node[] filterComments(Dominator dom) {
308     return dom.getNodes.filterComments();
309 }