Fixed Surfing parser to not parse javscript and img files

This commit is contained in:
Ziver Koc 2014-08-01 16:45:30 +02:00
parent fa69c367e5
commit 009a5c536f
2 changed files with 47 additions and 17 deletions

View file

@ -39,23 +39,48 @@ public class UeBehaviourSurfing extends UeBehaviour {
rootUrl = url;
}
private static class URLContainer{
public URL url;
public boolean parsable;
private URLContainer(URL url, boolean parsable) {
this.url = url;
this.parsable = parsable;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
URLContainer that = (URLContainer) o;
if (!url.equals(that.url)) return false;
return true;
}
@Override
public int hashCode() {
return url.hashCode();
}
}
@Override
protected void execute() throws IOException {
if(!rootUrl.startsWith("http://"))
rootUrl = "http://"+ rootUrl;
List<URL> urlList = new ArrayList<URL>();
urlList.add(new URL(rootUrl));
List<URLContainer> urlList = new ArrayList<URLContainer>();
urlList.add(new URLContainer(new URL(rootUrl), true));
byte[] data = new byte[100];
IOException retException = null;
long totalRead = 0;
for(int i=0; i<urlList.size(); i++) {
try {
URL url = urlList.get(i);
log.debug("Downloading: " + url);
URLContainer cont = urlList.get(i);
log.debug("Downloading: " + cont.url);
URLConnection connection = url.openConnection();
URLConnection connection = cont.url.openConnection();
connection.setUseCaches(false);
connection.connect();
InputStream in = connection.getInputStream();
@ -73,7 +98,8 @@ public class UeBehaviourSurfing extends UeBehaviour {
super.setHandledIncomingData(read);
content.append(new String(data, 0, read));
}
getAdditionalContent(url, content.toString(), urlList);
if(cont.parsable)
getAdditionalContent(cont.url, content.toString(), urlList);
}catch(IOException e){
log.warn(null, e);
if(retException == null) retException = e;
@ -81,32 +107,39 @@ public class UeBehaviourSurfing extends UeBehaviour {
}
estimatedDataLength = totalRead;
log.debug("Total download size: "+ estimatedDataLength +" bytes");
if(retException != null)
throw retException;
}
private static final Pattern[] CONTENT_INCLUSION_PATTERNS = new Pattern[]{
// [\w\/\.-]
private static final Pattern[] PARSABLE_CONTENT_INCLUSION_PATTERNS = new Pattern[]{
// HTML
Pattern.compile("<img.* src=\"(.*?)\"", Pattern.CASE_INSENSITIVE),
Pattern.compile("<iframe.* src=\"(.*?)\"", Pattern.CASE_INSENSITIVE),
// CSS
Pattern.compile("<link.*rel=\"stylesheet\".*href=\"(.*?)\".*>", Pattern.CASE_INSENSITIVE),
Pattern.compile("@import [\"']?(.*?)[\"']?", Pattern.CASE_INSENSITIVE),
Pattern.compile("(?:import|background)[\\W:]*url\\([\"']?(?![\"']?data:)(.*?)[\"']??\\)", Pattern.CASE_INSENSITIVE),
};
private static final Pattern[] CONTENT_INCLUSION_PATTERNS = new Pattern[]{
// HTML
Pattern.compile("<img.* src=\"(.*?)\"", Pattern.CASE_INSENSITIVE),
// Javascript
Pattern.compile("<script.* src=\"(.*?)\"", Pattern.CASE_INSENSITIVE)
};
private void getAdditionalContent(URL baseUrl, String data, List<URL> urlList){
for(Pattern pattern : CONTENT_INCLUSION_PATTERNS){
private void getAdditionalContent(URL baseUrl, String data, List<URLContainer> urlList){
getAdditionalContent(baseUrl, data, urlList, PARSABLE_CONTENT_INCLUSION_PATTERNS, true);
getAdditionalContent(baseUrl, data, urlList, PARSABLE_CONTENT_INCLUSION_PATTERNS, false);
}
private void getAdditionalContent(URL baseUrl, String data, List<URLContainer> urlList, Pattern[] patternList, boolean parsable){
for(Pattern pattern : patternList){
Matcher m = pattern.matcher(data);
while(m.find()){
try {
String strUrl = m.group(1);
log.debug("Parsing(Regex: "+pattern.pattern()+"): " + strUrl);
URL url = new URL(baseUrl, strUrl);
if(!urlList.contains(url))
urlList.add(url);
URLContainer cont = new URLContainer(new URL(baseUrl, strUrl), parsable);
if(!urlList.contains(cont))
urlList.add(cont);
}catch(MalformedURLException e){
log.warn(null, e);
}

View file

@ -1,16 +1,13 @@
package com.ericsson.uecontrol.gui;
import android.app.ActionBar;
import android.content.Context;
import android.content.Intent;
import android.content.SharedPreferences;
import android.content.SharedPreferences.OnSharedPreferenceChangeListener;
import android.os.Bundle;
import android.os.Environment;
import android.os.Handler;
import android.preference.PreferenceManager;
import android.support.v4.app.FragmentActivity;
import android.util.Log;
import android.view.Menu;
import android.view.MenuItem;
import android.widget.Toast;