Fixed Surfing parser to not parse javscript and img files
This commit is contained in:
parent
fa69c367e5
commit
009a5c536f
2 changed files with 47 additions and 17 deletions
|
|
@ -39,23 +39,48 @@ public class UeBehaviourSurfing extends UeBehaviour {
|
|||
rootUrl = url;
|
||||
}
|
||||
|
||||
private static class URLContainer{
|
||||
public URL url;
|
||||
public boolean parsable;
|
||||
|
||||
private URLContainer(URL url, boolean parsable) {
|
||||
this.url = url;
|
||||
this.parsable = parsable;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
URLContainer that = (URLContainer) o;
|
||||
if (!url.equals(that.url)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return url.hashCode();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void execute() throws IOException {
|
||||
if(!rootUrl.startsWith("http://"))
|
||||
rootUrl = "http://"+ rootUrl;
|
||||
|
||||
List<URL> urlList = new ArrayList<URL>();
|
||||
urlList.add(new URL(rootUrl));
|
||||
List<URLContainer> urlList = new ArrayList<URLContainer>();
|
||||
urlList.add(new URLContainer(new URL(rootUrl), true));
|
||||
byte[] data = new byte[100];
|
||||
IOException retException = null;
|
||||
long totalRead = 0;
|
||||
|
||||
for(int i=0; i<urlList.size(); i++) {
|
||||
try {
|
||||
URL url = urlList.get(i);
|
||||
log.debug("Downloading: " + url);
|
||||
URLContainer cont = urlList.get(i);
|
||||
log.debug("Downloading: " + cont.url);
|
||||
|
||||
URLConnection connection = url.openConnection();
|
||||
URLConnection connection = cont.url.openConnection();
|
||||
connection.setUseCaches(false);
|
||||
connection.connect();
|
||||
InputStream in = connection.getInputStream();
|
||||
|
|
@ -73,7 +98,8 @@ public class UeBehaviourSurfing extends UeBehaviour {
|
|||
super.setHandledIncomingData(read);
|
||||
content.append(new String(data, 0, read));
|
||||
}
|
||||
getAdditionalContent(url, content.toString(), urlList);
|
||||
if(cont.parsable)
|
||||
getAdditionalContent(cont.url, content.toString(), urlList);
|
||||
}catch(IOException e){
|
||||
log.warn(null, e);
|
||||
if(retException == null) retException = e;
|
||||
|
|
@ -81,32 +107,39 @@ public class UeBehaviourSurfing extends UeBehaviour {
|
|||
}
|
||||
|
||||
estimatedDataLength = totalRead;
|
||||
log.debug("Total download size: "+ estimatedDataLength +" bytes");
|
||||
if(retException != null)
|
||||
throw retException;
|
||||
}
|
||||
|
||||
private static final Pattern[] CONTENT_INCLUSION_PATTERNS = new Pattern[]{
|
||||
// [\w\/\.-]
|
||||
private static final Pattern[] PARSABLE_CONTENT_INCLUSION_PATTERNS = new Pattern[]{
|
||||
// HTML
|
||||
Pattern.compile("<img.* src=\"(.*?)\"", Pattern.CASE_INSENSITIVE),
|
||||
Pattern.compile("<iframe.* src=\"(.*?)\"", Pattern.CASE_INSENSITIVE),
|
||||
// CSS
|
||||
Pattern.compile("<link.*rel=\"stylesheet\".*href=\"(.*?)\".*>", Pattern.CASE_INSENSITIVE),
|
||||
Pattern.compile("@import [\"']?(.*?)[\"']?", Pattern.CASE_INSENSITIVE),
|
||||
Pattern.compile("(?:import|background)[\\W:]*url\\([\"']?(?![\"']?data:)(.*?)[\"']??\\)", Pattern.CASE_INSENSITIVE),
|
||||
};
|
||||
private static final Pattern[] CONTENT_INCLUSION_PATTERNS = new Pattern[]{
|
||||
// HTML
|
||||
Pattern.compile("<img.* src=\"(.*?)\"", Pattern.CASE_INSENSITIVE),
|
||||
// Javascript
|
||||
Pattern.compile("<script.* src=\"(.*?)\"", Pattern.CASE_INSENSITIVE)
|
||||
};
|
||||
private void getAdditionalContent(URL baseUrl, String data, List<URL> urlList){
|
||||
for(Pattern pattern : CONTENT_INCLUSION_PATTERNS){
|
||||
private void getAdditionalContent(URL baseUrl, String data, List<URLContainer> urlList){
|
||||
getAdditionalContent(baseUrl, data, urlList, PARSABLE_CONTENT_INCLUSION_PATTERNS, true);
|
||||
getAdditionalContent(baseUrl, data, urlList, PARSABLE_CONTENT_INCLUSION_PATTERNS, false);
|
||||
}
|
||||
private void getAdditionalContent(URL baseUrl, String data, List<URLContainer> urlList, Pattern[] patternList, boolean parsable){
|
||||
for(Pattern pattern : patternList){
|
||||
Matcher m = pattern.matcher(data);
|
||||
while(m.find()){
|
||||
try {
|
||||
String strUrl = m.group(1);
|
||||
log.debug("Parsing(Regex: "+pattern.pattern()+"): " + strUrl);
|
||||
URL url = new URL(baseUrl, strUrl);
|
||||
if(!urlList.contains(url))
|
||||
urlList.add(url);
|
||||
URLContainer cont = new URLContainer(new URL(baseUrl, strUrl), parsable);
|
||||
if(!urlList.contains(cont))
|
||||
urlList.add(cont);
|
||||
}catch(MalformedURLException e){
|
||||
log.warn(null, e);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,16 +1,13 @@
|
|||
package com.ericsson.uecontrol.gui;
|
||||
|
||||
import android.app.ActionBar;
|
||||
import android.content.Context;
|
||||
import android.content.Intent;
|
||||
import android.content.SharedPreferences;
|
||||
import android.content.SharedPreferences.OnSharedPreferenceChangeListener;
|
||||
import android.os.Bundle;
|
||||
import android.os.Environment;
|
||||
import android.os.Handler;
|
||||
import android.preference.PreferenceManager;
|
||||
import android.support.v4.app.FragmentActivity;
|
||||
import android.util.Log;
|
||||
import android.view.Menu;
|
||||
import android.view.MenuItem;
|
||||
import android.widget.Toast;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue