diff --git a/src/org/oscim/database/oscimap/MapDatabase.java b/src/org/oscim/database/oscimap/MapDatabase.java index f6acc622..f8dc949e 100644 --- a/src/org/oscim/database/oscimap/MapDatabase.java +++ b/src/org/oscim/database/oscimap/MapDatabase.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.net.SocketException; import java.net.SocketTimeoutException; import java.net.UnknownHostException; -import java.nio.charset.Charset; import java.util.Arrays; import org.oscim.core.BoundingBox; @@ -33,6 +32,7 @@ import org.oscim.database.IMapDatabaseCallback; import org.oscim.database.MapInfo; import org.oscim.database.MapOptions; import org.oscim.layers.tile.MapTile; +import org.oscim.utils.UTF8Decoder; import android.os.Environment; import android.os.SystemClock; @@ -74,9 +74,13 @@ public class MapDatabase implements IMapDatabase { private final boolean debug = false; private LwHttp lwHttp; - //private final WayData mWay = new WayData(); - private final MapElement mElem = new MapElement(); + private final UTF8Decoder mStringDecoder; + private final MapElement mElem; + public MapDatabase(){ + mStringDecoder = new UTF8Decoder(); + mElem = new MapElement(); + } @Override public QueryResult executeQuery(MapTile tile, IMapDatabaseCallback mapDatabaseCallback) { QueryResult result = QueryResult.SUCCESS; @@ -175,7 +179,6 @@ public class MapDatabase implements IMapDatabase { } mOpen = true; - initDecorder(); return OpenResult.SUCCESS; @@ -680,14 +683,11 @@ public class MapDatabase implements IMapDatabase { return result; } - private final static Charset UTF8 = Charset.forName("UTF-8"); - // TODO: use own String builder that reuses the char conversion array. - private String decodeString() throws IOException { final int size = decodeVarint32(); lwHttp.readBuffer(size); + final String result = mStringDecoder.decode(lwHttp.buffer, lwHttp.bufferPos, size); - final String result = new String(lwHttp.buffer, lwHttp.bufferPos, size, UTF8); lwHttp.bufferPos += size; mBytesProcessed += size; return result; diff --git a/src/org/oscim/utils/UTF8Decoder.java b/src/org/oscim/utils/UTF8Decoder.java new file mode 100644 index 00000000..63add61b --- /dev/null +++ b/src/org/oscim/utils/UTF8Decoder.java @@ -0,0 +1,136 @@ + +package org.oscim.utils; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// modified new String so that it reuses temporary buffer instead of reallocation +// when guess for size was incorrect. +public class UTF8Decoder { + private static final char REPLACEMENT_CHAR = (char) 0xfffd; + + char[] mBuffer; + int mBufferSize = 0; + + public String decode(byte[] data, int offset, int byteCount) { + if ((offset | byteCount) < 0 || byteCount > data.length - offset) { + throw new IllegalArgumentException("Brrr " + data.length + + " " + offset + " " + byteCount); + } + + byte[] d = data; + char[] v; + + if (mBufferSize < byteCount) + v = mBuffer = new char[byteCount]; + else + v = mBuffer; + + int idx = offset; + int last = offset + byteCount; + int s = 0; + outer: while (idx < last) { + byte b0 = d[idx++]; + if ((b0 & 0x80) == 0) { + // 0xxxxxxx + // Range: U-00000000 - U-0000007F + int val = b0 & 0xff; + v[s++] = (char) val; + } else if (((b0 & 0xe0) == 0xc0) || ((b0 & 0xf0) == 0xe0) || + ((b0 & 0xf8) == 0xf0) || ((b0 & 0xfc) == 0xf8) || ((b0 & 0xfe) == 0xfc)) { + int utfCount = 1; + if ((b0 & 0xf0) == 0xe0) + utfCount = 2; + else if ((b0 & 0xf8) == 0xf0) + utfCount = 3; + else if ((b0 & 0xfc) == 0xf8) + utfCount = 4; + else if ((b0 & 0xfe) == 0xfc) + utfCount = 5; + + // 110xxxxx (10xxxxxx)+ + // Range: U-00000080 - U-000007FF (count == 1) + // Range: U-00000800 - U-0000FFFF (count == 2) + // Range: U-00010000 - U-001FFFFF (count == 3) + // Range: U-00200000 - U-03FFFFFF (count == 4) + // Range: U-04000000 - U-7FFFFFFF (count == 5) + + if (idx + utfCount > last) { + v[s++] = REPLACEMENT_CHAR; + break; + } + + // Extract usable bits from b0 + int val = b0 & (0x1f >> (utfCount - 1)); + for (int i = 0; i < utfCount; i++) { + byte b = d[idx++]; + if ((b & 0xC0) != 0x80) { + v[s++] = REPLACEMENT_CHAR; + idx--; // Put the input char back + continue outer; + } + // Push new bits in from the right side + val <<= 6; + val |= b & 0x3f; + } + + // Note: Java allows overlong char + // specifications To disallow, check that val + // is greater than or equal to the minimum + // value for each count: + // + // count min value + // ----- ---------- + // 1 0x80 + // 2 0x800 + // 3 0x10000 + // 4 0x200000 + // 5 0x4000000 + + // Allow surrogate values (0xD800 - 0xDFFF) to + // be specified using 3-byte UTF values only + if ((utfCount != 2) && (val >= 0xD800) && (val <= 0xDFFF)) { + v[s++] = REPLACEMENT_CHAR; + continue; + } + + // Reject chars greater than the Unicode maximum of U+10FFFF. + if (val > 0x10FFFF) { + v[s++] = REPLACEMENT_CHAR; + continue; + } + + // Encode chars from U+10000 up as surrogate pairs + if (val < 0x10000) { + v[s++] = (char) val; + } else { + int x = val & 0xffff; + int u = (val >> 16) & 0x1f; + int w = (u - 1) & 0xffff; + int hi = 0xd800 | (w << 6) | (x >> 10); + int lo = 0xdc00 | (x & 0x3ff); + v[s++] = (char) hi; + v[s++] = (char) lo; + } + } else { + // Illegal values 0x8*, 0x9*, 0xa*, 0xb*, 0xfd-0xff + v[s++] = REPLACEMENT_CHAR; + } + } + return new String(v, 0, s); + } +}