commit 27c25a8457b4ad616c9ff602e06e5557034066e9
parent ea033c3ea31772f5e5b0f24902c99d0890e86ceb
Author: Brian Swetland <swetland@frotz.net>
Date: Sun, 7 Aug 2011 07:35:16 -0700
XML/XMLSequence: remove use of Pattern and Matcher
On OpenJDK 1.6 on Linux, this is in the noise (listing 1900 tracks
still takes ~5 seconds).
On Android 2.3, this is a huge performance improvement (listing 1900
tracks takes ~6 seconds, whereas with the regex version it took ~110
seconds, due to Matcher.reset() and String.contentEquals() calling
CharSequence.toString() excessively).
Diffstat:
2 files changed, 220 insertions(+), 69 deletions(-)
diff --git a/net/frotz/sonos/XML.java b/net/frotz/sonos/XML.java
@@ -31,15 +31,19 @@ import java.io.PrintStream;
// TODO: ' -> '
public class XML {
+ private static final boolean DEBUG = false;
+
XMLSequence seq; /* entire buffer */
- XMLSequence tag; /* most recent tag */
XMLSequence tmp; /* for content return */
char[] xml;
- int offset;
- int count;
- Matcher mTag;
- Matcher mAttr;
+ /* offset and length of the name of the current tag */
+ int tag_off;
+ int tag_len;
+ /* offset and length of the attr area of the current tag */
+ int att_off;
+ int att_len;
+ /* true if the current tag is <...>, false if </...> */
boolean isOpen;
CharsetDecoder decoder;
@@ -50,10 +54,7 @@ public class XML {
public XML(int size) {
decoder = cs.newDecoder();
seq = new XMLSequence();
- tag = new XMLSequence();
tmp = new XMLSequence();
- mTag = pTag.matcher(seq);
- mAttr = pAttr.matcher(tmp);
xml = new char[size];
buf = CharBuffer.wrap(xml);
}
@@ -73,40 +74,48 @@ public class XML {
}
void reset() {
seq.init(xml, buf.arrayOffset(), buf.length());
- tag.init(xml, 0, 0);
tmp.init(xml, 0, 0);
- offset = 0;
nextTag();
- //System.err.println("XML reset, "+buf.length()+" bytes.");
}
public void rewind() {
- offset = 0;
+ seq.pos = seq.offset;
nextTag();
}
public XMLSequence getAttr(String name) {
- int off = mTag.start(3);
- int end = off + mTag.end(3);
-
- tmp.offset = 0;
- tmp.count = end;
-
- /* hack: work around Android bug */
- mAttr.reset(tmp);
-
- while (mAttr.find(off)) {
- //System.err.println("ANAME: " + mAttr.group(1));
- //System.err.println("ATEXT: " + mAttr.group(2));
- tmp.offset = mAttr.start(1);
- tmp.count = mAttr.end(1) - tmp.offset;
- if (name.contentEquals(tmp)) {
- tmp.offset = mAttr.start(2);
- tmp.count = mAttr.end(2) - tmp.offset;
- return tmp;
- }
- tmp.offset = 0;
- tmp.count = end;
- off = mAttr.end();
+ int nlen = name.length();
+ int n;
+
+ tmp.offset = att_off;
+ tmp.count = att_len;
+ tmp.pos = att_off;
+
+ for (;;) {
+ int off = tmp.space();
+ int len = tmp.name();
+ if (len < 0)
+ break;
+ if (DEBUG) System.err.println("ANAME: ["+new String(tmp.data,off,len)+"]");
+ int voff = tmp.value();
+ if (voff < 0)
+ break;
+ int vend = tmp.next('"');
+ if (vend < 0)
+ break;
+ vend--;
+ if (DEBUG) System.err.println("ATEXT: ["+new String(tmp.data,voff,vend-voff)+"]");
+
+ if (nlen != len)
+ continue;
+ for (n = 0; n < len; n++)
+ if (name.charAt(n) != tmp.data[off+n]) /* XXX yuck */
+ break;
+ if (nlen != n)
+ continue;
+
+ tmp.offset = voff;
+ tmp.count = vend - voff;
+ return tmp;
}
return null;
}
@@ -118,7 +127,7 @@ public class XML {
char[] data = xml;
int n;
tmp.data = data;
- n = tmp.offset = mTag.end();
+ n = tmp.offset = (att_off + att_len);
try {
for (;;) {
if (data[n] == '<')
@@ -173,26 +182,35 @@ public class XML {
return isOpen;
}
+ public boolean tag_eq(CharSequence name) {
+ if (name.length() != tag_len)
+ return false;
+ for (int n = 0; n < tag_len; n++)
+ if (name.charAt(n) != seq.data[tag_off + n]) /* XXX yuck */
+ return false;
+ return true;
+ }
/* require <tag> and consume it */
public void open(String name) throws XML.Oops {
- if (!isOpen || !name.contentEquals(tag))
+ if (!isOpen || !tag_eq(name))
throw new XML.Oops("expecting <"+name+"> but found " + str());
nextTag();
}
/* require </tag> and consume it */
public void close(String name) throws XML.Oops {
- if (isOpen || !name.contentEquals(tag))
+ if (isOpen || !tag_eq(name))
throw new XML.Oops("expecting </"+name+"> but found " + str());
nextTag();
}
/* require <tag> text </tag> and return text */
public XMLSequence read(String name) throws XML.Oops {
- int start = mTag.end();
+ int start = att_off + att_len;
open(name);
- tmp.adjust(start, mTag.start());
+ tmp.adjust(start, tag_off - 2);
close(name);
+ if (DEBUG) System.err.println("VAL ["+tmp+"]");
return tmp;
}
@@ -202,42 +220,41 @@ public class XML {
return false;
name.data = xml;
- name.offset = tag.offset;
- name.count = tag.count;
+ name.offset = tag_off;
+ name.count = tag_len;
value.data = xml;
- value.offset = mTag.end();
-
+ value.offset = att_off + att_len;
nextTag();
+ value.count = tag_off - value.offset - 2;
- value.count = mTag.start() - value.offset;
close(name);
return true;
}
public void close(XMLSequence name) throws XML.Oops {
if (isOpen)
- throw new XML.Oops("1expected </"+name+">, found <"+tag+">");
- if (!name.eq(tag))
- throw new XML.Oops("2expected </"+name+">, found </"+tag+">");
+ throw new XML.Oops("1expected </"+name+">, found "+str());
+ if (!tag_eq(name))
+ throw new XML.Oops("2expected </"+name+">, found "+str());
nextTag();
}
public boolean tryRead(String name, XMLSequence value) throws XML.Oops {
- if (!isOpen || !name.contentEquals(tag))
+ if (!isOpen || !tag_eq(name))
return false;
value.data = xml;
- value.offset = mTag.end();
+ value.offset = att_off + att_len;
nextTag();
- value.count = mTag.start() - value.offset;
+ value.count = tag_off - value.offset;
close(name);
return true;
}
/* eat the current tag and any children */
public void consume() throws XML.Oops {
- tmp.offset = mTag.start(2);
- tmp.count = mTag.end(2) - tmp.offset;
+ tmp.offset = tag_off;
+ tmp.count = tag_len;
nextTag();
while (isOpen)
consume();
@@ -247,30 +264,42 @@ public class XML {
/* format current begin/end tag as a string. for error messages */
String str() {
if (isOpen)
- return "<" + tag + ">";
+ return "<" + new String(seq.data, tag_off, tag_len) + ">";
else
- return "</" + tag + ">";
+ return "</" + new String(seq.data, tag_off, tag_len) + ">";
}
void nextTag() {
- /* hack: work around Android bug */
- mTag.reset(seq);
-
/* can't deal with comments or directives */
- if (!mTag.find(offset)) {
- tag.adjust(0,0);
+ int off = seq.next('<');
+ boolean opn = seq.isOpen();
+ int len = seq.name();
+ int att = seq.pos;
+ int nxt = seq.next('>');
+
+ /* don't advance if we're in a strange state */
+ if ((off < 0) || (len < 0) || (nxt < 0))
return;
- }
- isOpen = (mTag.start(1) == -1);
- tag.adjust(mTag.start(2), mTag.end(2));
- offset = mTag.end();
- }
+ if (!opn)
+ off++;
- /* G1: \ G2: tagname G3: attributes */
- static Pattern pTag = Pattern.compile("<(/)?([a-zA-Z:_][a-zA-Z0-9:\\-\\._]*)([^>]*)>",Pattern.DOTALL);
- /* G1: name G2: value */
- static Pattern pAttr = Pattern.compile("\\s*([a-zA-Z:_][a-zA-Z0-9:\\-\\._]*)=\"([^\"]*)\"");
+ tag_off = off;
+ tag_len = len;
+ isOpen = opn;
+
+ att_off = att;
+ att_len = nxt - att;
+
+ if (DEBUG) {
+ if (opn) {
+ System.err.println("TAG ["+new String(seq.data,tag_off,tag_len)+"]");
+ System.err.println("ATR ["+new String(seq.data,att_off,att_len)+"]");
+ } else {
+ System.err.println("tag ["+new String(seq.data,tag_off,tag_len)+"]");
+ }
+ }
+ }
static Charset cs = Charset.forName("UTF-8");
diff --git a/net/frotz/sonos/XMLSequence.java b/net/frotz/sonos/XMLSequence.java
@@ -20,6 +20,7 @@ class XMLSequence implements CharSequence {
char[] data;
int offset;
int count;
+ int pos;
public XMLSequence() { }
@@ -27,15 +28,18 @@ class XMLSequence implements CharSequence {
this.data = data;
offset = start;
count = end - start;
+ pos = offset;
}
public void init(XMLSequence s) {
data = s.data;
offset = s.offset;
count = s.count;
+ pos = offset;
}
void adjust(int start, int end) {
offset = start;
count = end - start;
+ pos = offset;
}
boolean eq(XMLSequence other) {
int count = this.count;
@@ -115,8 +119,9 @@ class XMLSequence implements CharSequence {
}
break;
}
+ pos = offset;
}
-
+
/* CharSequence interface */
public int length() {
return count;
@@ -136,4 +141,121 @@ class XMLSequence implements CharSequence {
return "";
return new String(data, offset, count);
}
+
+ /* Parsing Tools */
+
+ /* returns next offset after a match with c, or -1 if no match */
+ int next(char c) {
+ char[] x = data;
+ int n = pos;
+ int end = offset + count;
+ while (n < end) {
+ if (x[n++] == c) {
+ pos = n;
+ return n;
+ }
+ }
+ return -1;
+ }
+
+ /* advance pos beyond whitespace, returns updated position */
+ int space() {
+ char[] x = data;
+ int n = pos;
+ int end = offset + count;
+ while (n < end) {
+ switch (x[n]) {
+ case ' ':
+ case '\t':
+ case '\r':
+ case '\n':
+ n++;
+ break;
+ default:
+ pos = n;
+ return n;
+ }
+ }
+ pos = n;
+ return n;
+ }
+
+ /* matches an XML element or attribute name, returns length (-1==failure), pos will be after */
+ int name() {
+ char[] x = data;
+ int n = pos;
+ int end = offset + count;
+ if (n >= end)
+ return -1;
+ try {
+ if (xName0[x[n]] == 0)
+ return -1;
+ } catch (ArrayIndexOutOfBoundsException e) {
+ return -1;
+ }
+ while (++n < end) {
+ try {
+ if(xNameX[x[n]] == 1)
+ continue;
+ } catch (ArrayIndexOutOfBoundsException e) { }
+ break;
+ }
+ end = n - pos;
+ pos = n;
+ return end;
+ }
+
+ /* match =" returning position or -1 if no match */
+ int value() {
+ char[] x = data;
+ int n = pos;
+ int end = offset + count;
+ if (n >= end)
+ return -1;
+ if (x[n++] != '=')
+ return -1;
+ if (n >= end)
+ return -1;
+ if (x[n++] != '"')
+ return -1;
+ pos = n;
+ return n;
+ }
+
+ /* returns false (and advances) if data[pos] == '/', otherwise true */
+ boolean isOpen() {
+ if (data[pos] == '/') {
+ pos++;
+ return false;
+ } else {
+ return true;
+ }
+ }
+
+ static char xName0[];
+ static char xNameX[];
+
+ static {
+ xName0 = new char[127];
+ xNameX = new char[127];
+ int n;
+ for (n = 'a'; n <= 'z'; n++) {
+ xName0[n] = 1;
+ xNameX[n] = 1;
+ }
+ for (n = 'A'; n <= 'Z'; n++) {
+ xName0[n] = 1;
+ xNameX[n] = 1;
+ }
+ xName0['_'] = 1;
+ xName0[':'] = 1;
+
+ for (n = '0'; n <= '9'; n++) {
+ xNameX[n] = 1;
+ }
+ xNameX['_'] = 1;
+ xNameX[':'] = 1;
+ xNameX['-'] = 1;
+ xNameX['.'] = 1;
+ }
}