Fixed Surfing parser to not parse javscript and img files
This commit is contained in:
parent
fa69c367e5
commit
009a5c536f
2 changed files with 47 additions and 17 deletions
|
|
@ -39,23 +39,48 @@ public class UeBehaviourSurfing extends UeBehaviour {
|
||||||
rootUrl = url;
|
rootUrl = url;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static class URLContainer{
|
||||||
|
public URL url;
|
||||||
|
public boolean parsable;
|
||||||
|
|
||||||
|
private URLContainer(URL url, boolean parsable) {
|
||||||
|
this.url = url;
|
||||||
|
this.parsable = parsable;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
|
|
||||||
|
URLContainer that = (URLContainer) o;
|
||||||
|
if (!url.equals(that.url)) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return url.hashCode();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void execute() throws IOException {
|
protected void execute() throws IOException {
|
||||||
if(!rootUrl.startsWith("http://"))
|
if(!rootUrl.startsWith("http://"))
|
||||||
rootUrl = "http://"+ rootUrl;
|
rootUrl = "http://"+ rootUrl;
|
||||||
|
|
||||||
List<URL> urlList = new ArrayList<URL>();
|
List<URLContainer> urlList = new ArrayList<URLContainer>();
|
||||||
urlList.add(new URL(rootUrl));
|
urlList.add(new URLContainer(new URL(rootUrl), true));
|
||||||
byte[] data = new byte[100];
|
byte[] data = new byte[100];
|
||||||
IOException retException = null;
|
IOException retException = null;
|
||||||
long totalRead = 0;
|
long totalRead = 0;
|
||||||
|
|
||||||
for(int i=0; i<urlList.size(); i++) {
|
for(int i=0; i<urlList.size(); i++) {
|
||||||
try {
|
try {
|
||||||
URL url = urlList.get(i);
|
URLContainer cont = urlList.get(i);
|
||||||
log.debug("Downloading: " + url);
|
log.debug("Downloading: " + cont.url);
|
||||||
|
|
||||||
URLConnection connection = url.openConnection();
|
URLConnection connection = cont.url.openConnection();
|
||||||
connection.setUseCaches(false);
|
connection.setUseCaches(false);
|
||||||
connection.connect();
|
connection.connect();
|
||||||
InputStream in = connection.getInputStream();
|
InputStream in = connection.getInputStream();
|
||||||
|
|
@ -73,7 +98,8 @@ public class UeBehaviourSurfing extends UeBehaviour {
|
||||||
super.setHandledIncomingData(read);
|
super.setHandledIncomingData(read);
|
||||||
content.append(new String(data, 0, read));
|
content.append(new String(data, 0, read));
|
||||||
}
|
}
|
||||||
getAdditionalContent(url, content.toString(), urlList);
|
if(cont.parsable)
|
||||||
|
getAdditionalContent(cont.url, content.toString(), urlList);
|
||||||
}catch(IOException e){
|
}catch(IOException e){
|
||||||
log.warn(null, e);
|
log.warn(null, e);
|
||||||
if(retException == null) retException = e;
|
if(retException == null) retException = e;
|
||||||
|
|
@ -81,32 +107,39 @@ public class UeBehaviourSurfing extends UeBehaviour {
|
||||||
}
|
}
|
||||||
|
|
||||||
estimatedDataLength = totalRead;
|
estimatedDataLength = totalRead;
|
||||||
|
log.debug("Total download size: "+ estimatedDataLength +" bytes");
|
||||||
if(retException != null)
|
if(retException != null)
|
||||||
throw retException;
|
throw retException;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final Pattern[] CONTENT_INCLUSION_PATTERNS = new Pattern[]{
|
private static final Pattern[] PARSABLE_CONTENT_INCLUSION_PATTERNS = new Pattern[]{
|
||||||
// [\w\/\.-]
|
|
||||||
// HTML
|
// HTML
|
||||||
Pattern.compile("<img.* src=\"(.*?)\"", Pattern.CASE_INSENSITIVE),
|
|
||||||
Pattern.compile("<iframe.* src=\"(.*?)\"", Pattern.CASE_INSENSITIVE),
|
Pattern.compile("<iframe.* src=\"(.*?)\"", Pattern.CASE_INSENSITIVE),
|
||||||
// CSS
|
// CSS
|
||||||
Pattern.compile("<link.*rel=\"stylesheet\".*href=\"(.*?)\".*>", Pattern.CASE_INSENSITIVE),
|
Pattern.compile("<link.*rel=\"stylesheet\".*href=\"(.*?)\".*>", Pattern.CASE_INSENSITIVE),
|
||||||
Pattern.compile("@import [\"']?(.*?)[\"']?", Pattern.CASE_INSENSITIVE),
|
Pattern.compile("@import [\"']?(.*?)[\"']?", Pattern.CASE_INSENSITIVE),
|
||||||
Pattern.compile("(?:import|background)[\\W:]*url\\([\"']?(?![\"']?data:)(.*?)[\"']??\\)", Pattern.CASE_INSENSITIVE),
|
Pattern.compile("(?:import|background)[\\W:]*url\\([\"']?(?![\"']?data:)(.*?)[\"']??\\)", Pattern.CASE_INSENSITIVE),
|
||||||
|
};
|
||||||
|
private static final Pattern[] CONTENT_INCLUSION_PATTERNS = new Pattern[]{
|
||||||
|
// HTML
|
||||||
|
Pattern.compile("<img.* src=\"(.*?)\"", Pattern.CASE_INSENSITIVE),
|
||||||
// Javascript
|
// Javascript
|
||||||
Pattern.compile("<script.* src=\"(.*?)\"", Pattern.CASE_INSENSITIVE)
|
Pattern.compile("<script.* src=\"(.*?)\"", Pattern.CASE_INSENSITIVE)
|
||||||
};
|
};
|
||||||
private void getAdditionalContent(URL baseUrl, String data, List<URL> urlList){
|
private void getAdditionalContent(URL baseUrl, String data, List<URLContainer> urlList){
|
||||||
for(Pattern pattern : CONTENT_INCLUSION_PATTERNS){
|
getAdditionalContent(baseUrl, data, urlList, PARSABLE_CONTENT_INCLUSION_PATTERNS, true);
|
||||||
|
getAdditionalContent(baseUrl, data, urlList, PARSABLE_CONTENT_INCLUSION_PATTERNS, false);
|
||||||
|
}
|
||||||
|
private void getAdditionalContent(URL baseUrl, String data, List<URLContainer> urlList, Pattern[] patternList, boolean parsable){
|
||||||
|
for(Pattern pattern : patternList){
|
||||||
Matcher m = pattern.matcher(data);
|
Matcher m = pattern.matcher(data);
|
||||||
while(m.find()){
|
while(m.find()){
|
||||||
try {
|
try {
|
||||||
String strUrl = m.group(1);
|
String strUrl = m.group(1);
|
||||||
log.debug("Parsing(Regex: "+pattern.pattern()+"): " + strUrl);
|
log.debug("Parsing(Regex: "+pattern.pattern()+"): " + strUrl);
|
||||||
URL url = new URL(baseUrl, strUrl);
|
URLContainer cont = new URLContainer(new URL(baseUrl, strUrl), parsable);
|
||||||
if(!urlList.contains(url))
|
if(!urlList.contains(cont))
|
||||||
urlList.add(url);
|
urlList.add(cont);
|
||||||
}catch(MalformedURLException e){
|
}catch(MalformedURLException e){
|
||||||
log.warn(null, e);
|
log.warn(null, e);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,13 @@
|
||||||
package com.ericsson.uecontrol.gui;
|
package com.ericsson.uecontrol.gui;
|
||||||
|
|
||||||
import android.app.ActionBar;
|
import android.app.ActionBar;
|
||||||
import android.content.Context;
|
|
||||||
import android.content.Intent;
|
import android.content.Intent;
|
||||||
import android.content.SharedPreferences;
|
import android.content.SharedPreferences;
|
||||||
import android.content.SharedPreferences.OnSharedPreferenceChangeListener;
|
import android.content.SharedPreferences.OnSharedPreferenceChangeListener;
|
||||||
import android.os.Bundle;
|
import android.os.Bundle;
|
||||||
import android.os.Environment;
|
|
||||||
import android.os.Handler;
|
import android.os.Handler;
|
||||||
import android.preference.PreferenceManager;
|
import android.preference.PreferenceManager;
|
||||||
import android.support.v4.app.FragmentActivity;
|
import android.support.v4.app.FragmentActivity;
|
||||||
import android.util.Log;
|
|
||||||
import android.view.Menu;
|
import android.view.Menu;
|
||||||
import android.view.MenuItem;
|
import android.view.MenuItem;
|
||||||
import android.widget.Toast;
|
import android.widget.Toast;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue