Blokkal
an Extendable KDE Blogging Client
SourceForge.net Logo

util.cpp

00001 /***************************************************************************
00002  *   Copyright (C) 2006 by Martin Mueller                                  *
00003  *   orvio@orvio.de                                                        *
00004  *                                                                         *
00005  *   This program is free software; you can redistribute it and/or modify  *
00006  *   it under the terms of the GNU General Public License as published by  *
00007  *   the Free Software Foundation; either version 2 of the License, or     *
00008  *   (at your option) any later version.                                   *
00009  *                                                                         *
00010  *   This program is distributed in the hope that it will be useful,       *
00011  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00012  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
00013  *   GNU General Public License for more details.                          *
00014  *                                                                         *
00015  *   You should have received a copy of the GNU General Public License     *
00016  *   along with this program; if not, write to the                         *
00017  *   Free Software Foundation, Inc.,                                       *
00018  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
00019  ***************************************************************************/
00020 #include "util.h"
00021 
00022 #include <kdebug.h>
00023 
00024 QString Blokkal::Io::Util::decodeUtf8Data( const QByteArray & rawData, bool treatAsFormData )
00025 {
00026         const unsigned long inputLength = rawData.length();
00027         QString decodedData;
00028         decodedData.reserve( inputLength );
00029         unsigned char byte = 0;
00030         unsigned int unicodeChar = 0;
00031         char more = 0;
00032         
00033         for ( unsigned long i = 0; i < inputLength; i++ ) {
00034                 if( treatAsFormData ) {
00035                         unsigned char character;
00036                         unsigned char highNibble;
00037                         unsigned char lowNibble;
00038                         //kDebug() << rawData.at( i ).cell() << endl;
00039                         switch ( character = rawData.at( i ) ) {
00040                         case '%':
00041                                 i++;
00042                                 character = rawData.at( i );
00043                                 if( character >= '0' && character <= '9' ) {
00044                                         highNibble = character - '0';
00045                                 }
00046                                 else {
00047                                         highNibble = ( character - 65 + 10 ) & 0xF;
00048                                 }
00049                                 i++;
00050                                 character = rawData.at( i );
00051                                 if( character >= '0' && character <= '9' ) {
00052                                         lowNibble = character - '0';
00053                                 }
00054                                 else {
00055                                         lowNibble = ( character - 65 + 10 ) & 0xF;
00056                                 }
00057                                 byte = ( highNibble << 4 ) | lowNibble ;
00058                                 break ;
00059                         case '+':
00060                                 byte = ' ';
00061                                 break ;
00062                         default:
00063                                 byte = character;
00064                         }
00065                 }
00066                 else {
00067                         byte = rawData.at( i );
00068                 }
00069                 
00070                 // 10******* => trailing byte
00071                 // 6 more bits for unicodeChar
00072                 if ( ( byte & 0xc0 ) == 0x80 && more > 0 ) { 
00073                         unicodeChar = ( unicodeChar << 6 ) | ( byte & 0x3f ); 
00074                         more--;
00075                         //was last byte
00076                         if ( more <= 0 ) {
00077                                 decodedData.append( QChar( unicodeChar ) );
00078                         }
00079                 }
00080                 // 0******* => US-ASCII character
00081                 else if ( ( byte & 0x80 ) == 0x00 ) {
00082                         decodedData.append( byte ) ;
00083                 }
00084                 // 110***** => 1 byte
00085                 else if ( ( byte & 0xe0 ) == 0xc0 ) {   
00086                         unicodeChar = byte & 0x1f;
00087                         more = 1;
00088                 }
00089                 // 1110**** => 2 bytes
00090                 else if ( ( byte & 0xf0 ) == 0xe0 ) {
00091                         unicodeChar = byte & 0x0f;
00092                         more = 2;
00093                 }
00094                 // 11110*** => 3 bytes
00095                 else if ( ( byte & 0xf8 ) == 0xf0 ) {
00096                         unicodeChar = byte & 0x07;
00097                         more = 3;
00098                 }
00099                 // 111110** => 4 bytes
00100                 else if ( ( byte & 0xfc ) == 0xf8 ) {
00101                         unicodeChar = byte & 0x03;
00102                         more = 4;
00103                 }
00104                 // 1111110* => 5 bytes
00105                 else if ( ( byte & 0xfe ) == 0xfc ) {
00106                         unicodeChar = byte & 0x01;
00107                         more = 5;
00108                 }
00109                 else {
00110                         more = 0;
00111                         kError() << k_funcinfo << "Encountered invalid byte: " << byte << endl;
00112                 }
00113 
00114         }
00115         return decodedData;
00116 }