@BurnerR - hier die Rohdaten aus der Analyse, gepackt:
https://dwrox.net/wicked_rawdata.tar.gz
Vielleicht kannst du damit mehr anfangen?
Und auch mal der aktuelle, allerdings nicht 100% fehlerfreie und niht optimale, Stand eines Filebuilders aus den gewonnenen Daten:
Allerdings ist diese Version noch nicht 100% fehlerfrei, ich debugge gerade noch ein Memory Leak bzw. Error....
Vielleicht mag sich jemand den Code genauer ansehen - speziell ob und wie man das
[src=c]while (readWord(&readerData, WORD) { if (........) break; }[/src] wegoptimieren kann?
Meine Idee wäre noch die Daten vor der Rückübersetzung zu sortieren, so das alle Daten fortlaufend korrekt gelistet werden ohne das man innerhalb der Datei eine Suchlauf mit Vorsprung für X Einträge/Elemente machen muß.
verbessern, entfernen kann.
[src=c]//------------------------------------------------------------------------------
// Author: Jan Rie <jan@dwrox.net>
//------------------------------------------------------------------------------
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <ctype.h>
#include <time.h>
#include <math.h>
//------------------------------------------------------------------------------
// Switches
#define DEBUG false
#define VERBOSE false
#define DOWRITEOUT true
#define LINESTOPROCESS 0
#define LOOKAHEADRANGE 2000
#define IDENTATIONBUFFER 33
#define OUTPUTFILE "output.txt"
#define DICTIONARYFILE "words.txt"
#define WIKITAGSFILE "wikitags.txt"
#define XMLTAGFILE "xmltags.txt"
#define XMLDATAFILE "xmldata.txt"
#define ENTITIESFILE "entities.txt"
// Buffers
#define XMLTAG_BUFFER 5120
// Counts of predefined const datatypes
#define FORMATS 8
#define ENTITIES 211
#define INDENTS 3
#define TEMPLATES 11
#define TAGTYPES 11
#define TAGCLOSINGS 3
#define MATHTAG 0
//------------------------------------------------------------------------------
/*
Wikipedia data types and such
See for wikitags
https://en.wikipedia.org/wiki/Help:Wiki_markup
https://en.wikipedia.org/wiki/Help:Cheatsheet
Reference for templates:
https://en.wikipedia.org/wiki/Help:Wiki_markup
*/
const char formatNames[FORMATS][15] = {
"Bold + Italic",
"Bold",
"Italic",
"Heading 6",
"Heading 5",
"Heading 4",
"Heading 3",
"Heading 2"
};
const char formats[FORMATS][7] = {
"'''''", // bold + italic
"'''", // bold
"''", // italic
"======", // Heading 6 - heading start
"=====",
"====",
"===",
"==" // Heading 2 - heading end
};
/*
NOTE: Indents are only used at the beginning of the line!
*/
const char indents[INDENTS][2] = {
"*", // List element, multiple levels, "**" Element of element
"#", // Numbered list element, multiple levels = "##" "Element of element"
":", // "indent 1", multiple levels using ":::" = 3
};
const char templates[TEMPLATES][18] = {
// NOTE: Should we cover template tags as well for shortening?
"{{-",
"{{align",
"{{break",
"{{clear",
"{{float",
"{{stack",
"{{outdent",
"{{plainlist",
"{{fake heading",
"{{endplainlist",
"{{unbulleted list"
};
const char wikiTagNames[TAGTYPES][19] = {
"Math type",
"Definition/Anchor",
"Table",
"Category",
"Media type",
"File type",
"Image type",
"Sound type",
"Wiktionary",
"Wikipedia",
"Link"
};
const char tagTypes[TAGTYPES][18] = {
// NOTE: Handle definitions after, in case we threat, templates too
"{{math", // https://en.wikipedia.org/wiki/Wikipedia:Rendering_math
"{{", // Definition => {{Main|autistic savant}}
"{\t", // Table start => ! (headline), |- (seperation, row/border), | "entry/ column data"
"[[category:",
"[[media:", // Media types start
"[[file:",
"[[image:", // [[Image:LeoTolstoy.jpg|thumb|150px|[[Leo Tolstoy|Leo Tolstoy]] 1828-1910]]
"[[sound:", // Media types end
//"#REDIRECT", // Redirect #REDIRECT [[United States]] (article) --- #REDIRECT [[United States#History]] (section)
"[[wiktionary:", // [[wiktionary:terrace|terrace]]s
"[[wikipedia:", // [[Wikipedia:Nupedia and Wikipedia]]
"[[" // Link => [[Autistic community#Declaration from the autism community|sent a letter to the United Nations]]
};
const char tagClosingsTypes[TAGCLOSINGS][3] = {
"}}",
"]]",
"|}"
};
//------------------------------------------------------------------------------
typedef struct xmldatakv {
char *key;
char *value;
} xmldatakv;
typedef struct entry {
unsigned short elementType; // 0 XMLTAG, 1 WIKITAG, 2 WORD, 3 ENTITY
unsigned int position;
unsigned int connectedTag;
unsigned int start;
unsigned int end;
unsigned int preSpacesCount;
unsigned int spacesCount;
unsigned int length;
unsigned int tagLength;
short tagType;
short dataFormatType;
short ownFormatType;
short hasPipe;
short isHandledTag;
short isDataNode;
short isClosed;
short xmlDataCount;
int formatStart;
int formatEnd;
struct xmldatakv *xmlData;
char *stringData;
} entry;
typedef struct collection {
FILE *outputFile;
FILE *dictFile;
FILE *wtagFile;
FILE *xmltagFile;
FILE *xmldataFile;
FILE *entitiesFile;
long int dictSize;
long int wtagSize;
long int xmltagSize;
long int xmldataSize;
long int entitiesSize;
long int readDict;
long int readwtag;
long int readxmltag;
long int readxmldata;
long int readentities;
unsigned int countWords;
unsigned int countWtag;
unsigned int countXmltag;
unsigned int countEntities;
struct entry *entriesWords;
struct entry *entriesWtag;
struct entry *entriesXmltag;
struct entry *entriesEntities;
} collection;
enum dataTypes {
XMLTAG = 0,
WIKITAG = 1,
WORD = 2,
ENTITY = 3
};
//------------------------------------------------------------------------------
// Function declarations
bool readXMLtag(struct collection*);
bool readWord(struct collection*);
bool readEntity(struct collection*);
bool readWikitag(struct collection*);
bool freeEntryByLine(struct collection*, short, unsigned int, unsigned int);
struct entry* getEntryByType(struct collection*, short);
struct entry* getInLine(struct collection*, short, unsigned int, unsigned int);
// ----------------------------------------------------------------------------
bool freeEntryByLine(struct collection *readerData, short elementType, unsigned int lineNum, unsigned int position) {
unsigned int count = 0;
struct entry* items = NULL;
if (elementType == XMLTAG) {
count = readerData->countXmltag;
items = readerData->entriesXmltag;
} else if (elementType == WORD) {
count = readerData->countWords;
items = readerData->entriesWords;
} else if (elementType == WIKITAG) {
count = readerData->countWtag;
items = readerData->entriesWtag;
} else {
count = readerData->countEntities;
items = readerData->entriesEntities;
}
if (count == 1) {
if ((items[0].start == lineNum || items[0].end == lineNum) && items[0].position == position) {
if (items[0].stringData) free(items[0].stringData);
if (elementType == XMLTAG) {
for (unsigned int j = 0; j < items[0].xmlDataCount; ++j) {
if (items[0].xmlData[j].key) free(items[0].xmlData[j].key);
if (items[0].xmlData[j].value) free(items[0].xmlData[j].value);
}
if (items[0].xmlData) free(items[0].xmlData);
}
items = (struct entry *) realloc(items, sizeof(struct entry) * count);
--count;
if (elementType == XMLTAG) {
readerData->countXmltag = count;
readerData->entriesXmltag = items;
} else if (elementType == WORD) {
readerData->countWords = count;
readerData->entriesWords = items;
} else if (elementType == WIKITAG) {
readerData->countWtag = count;
readerData->entriesWtag = items;
} else {
readerData->countEntities = count;
readerData->entriesEntities = items;
}
return true;
}
}
for (unsigned int i = count - 1; i != 0; --i) {
if ((items
.start == lineNum || items.end == lineNum) && items.position == position) {
if (i == count - 1) {
if (items.stringData) free(items.stringData);
if (elementType == XMLTAG) {
for (unsigned int j = 0; j < items.xmlDataCount; ++j) {
if (items.xmlData[j].key) free(items.xmlData[j].key);
if (items.xmlData[j].value) free(items.xmlData[j].value);
}
if (items.xmlData) free(items.xmlData);
}
items = (struct entry *) realloc(items, sizeof(struct entry) * count);
--count;
if (elementType == XMLTAG) readerData->countXmltag = count;
else if (elementType == WORD) readerData->countWords = count;
else if (elementType == WIKITAG) readerData->countWtag = count;
else readerData->countEntities = count;
return true;
} else {
if (items.stringData) free(items.stringData);
if (elementType == XMLTAG) {
for (unsigned int j = 0; j < items.xmlDataCount; ++j) {
if (items.xmlData[j].key) free(items.xmlData[j].key);
if (items.xmlData[j].value) free(items.xmlData[j].value);
}
if (items.xmlData) free(items.xmlData);
}
memmove(&items, &items[i + 1], sizeof(struct entry) * (count - i - 1));
--count;
if (elementType == XMLTAG) readerData->countXmltag = count;
else if (elementType == WORD) readerData->countWords = count;
else if (elementType == WIKITAG) readerData->countWtag = count;
else readerData->countEntities = count;
return true;
}
}
}
return false;
}
//------------------------------------------------------------------------------
struct entry* getEntryByType(struct collection *readerData, short elementType) {
if (elementType == XMLTAG) {
if (readerData->countXmltag == 0) return NULL;
return &readerData->entriesXmltag[readerData->countXmltag - 1];
} else if (elementType == WORD) {
if (readerData->countWords == 0) return NULL;
return &readerData->entriesWords[readerData->countWords - 1];
} else if (elementType == WIKITAG) {
if (readerData->countWtag == 0) return NULL;
return &readerData->entriesWtag[readerData->countWtag - 1];
} else {
if (readerData->countEntities == 0) return NULL;
return &readerData->entriesEntities[readerData->countEntities - 1];
}
return NULL;
}
//------------------------------------------------------------------------------
struct entry* getInLine(struct collection *readerData, short elementType, unsigned int lineNum, unsigned int position) {
unsigned int count = 0;
struct entry* items = NULL;
if (elementType == XMLTAG) {
count = readerData->countXmltag;
items = readerData->entriesXmltag;
} else if (elementType == WORD) {
count = readerData->countWords;
items = readerData->entriesWords;
} else if (elementType == WIKITAG) {
count = readerData->countWtag;
items = readerData->entriesWtag;
} else {
count = readerData->countEntities;
items = readerData->entriesEntities;
}
if (count == 0) return NULL;
if (position != 0) {
for (unsigned int i = 0 ; i < count; ++i) {
if (items.start == lineNum && items.end == lineNum && items.position == position) return &items;
}
} else {
for (unsigned int i = 0 ; i < count; ++i) {
if (items.start == lineNum || items.end == lineNum) return &items;
}
}
return NULL;
}
//------------------------------------------------------------------------------
// Main routine
int main(int argc, char *argv[]) {
printf("[ INFO ] Starting transpilling on file \"%s\".\n", OUTPUTFILE);
FILE *outputFile = fopen(OUTPUTFILE, "wb");
FILE *dictFile = fopen(DICTIONARYFILE, "rb");
FILE *wtagFile = fopen(WIKITAGSFILE, "rb");
FILE *xmltagFile = fopen(XMLTAGFILE, "rb");
FILE *xmldataFile = fopen(XMLDATAFILE, "rb");
FILE *entitiesFile = fopen(ENTITIESFILE, "rb");
fseek(dictFile, 0, SEEK_END);
fseek(wtagFile, 0, SEEK_END);
fseek(xmltagFile, 0, SEEK_END);
fseek(xmldataFile, 0, SEEK_END);
fseek(entitiesFile, 0, SEEK_END);
fpos_t dictFileSize;
fpos_t wtagFileSize;
fpos_t xmltagFileSize;
fpos_t xmldataFileSize;
fpos_t entitiesFileSize;
fgetpos(dictFile, &dictFileSize);
fgetpos(wtagFile, &wtagFileSize);
fgetpos(xmltagFile, &xmltagFileSize);
fgetpos(xmldataFile, &xmldataFileSize);
fgetpos(entitiesFile, &entitiesFileSize);
struct collection readerData = { outputFile,
dictFile, wtagFile, xmltagFile, xmldataFile, entitiesFile,
dictFileSize.__pos, wtagFileSize.__pos, xmltagFileSize.__pos, xmldataFileSize.__pos, entitiesFileSize.__pos,
0, 0, 0, 0, 0,
0, 0, 0, 0,
NULL, NULL, NULL, NULL
};
rewind(dictFile);
rewind(wtagFile);
rewind(xmltagFile);
rewind(xmldataFile);
rewind(entitiesFile);
time_t startTime = time(NULL);
unsigned int lineNum = 1;
unsigned int position = 1;
bool hasReplaced = false;
char preSpaces[32];
char subSpaces[32];
char indentation[IDENTATIONBUFFER];
int currentDepth = 0;
int previousDepth = currentDepth;
int lastWikiTypePos = -1;
short lastWikiType = -1;
struct entry* tmp = NULL;
unsigned int tmpMax = 0;
struct entry* preTmp = NULL;
struct entry* closeTag = NULL;
readXMLtag(&readerData);
readWikitag(&readerData);
readWord(&readerData);
readEntity(&readerData);
while (LINESTOPROCESS == 0 || lineNum <= LINESTOPROCESS) {
hasReplaced = false;
previousDepth = currentDepth;
#if VERBOSE
printf("\nLINE NUM: %d - %d\n", lineNum, position);
for (int i = 0, j = readerData.countXmltag; i < j; ++i) {
struct entry* item = &readerData.entriesXmltag;
printf("%d ::::: %d/%u --- %u - pipe: %d ===> %s\n", item->elementType, item->start, item->end, item->position, item->hasPipe, item->stringData);
}
for (int i = 0, j = readerData.countWtag; i < j; ++i) {
struct entry* item = &readerData.entriesWtag;
printf("%d ::::: %d/%u --- %u - pipe: %d ===> %s\n", item->elementType, item->start, item->end, item->position, item->hasPipe, item->stringData);
}
for (int i = 0, j = readerData.countWords; i < j; ++i) {
struct entry* item = &readerData.entriesWords;
printf("%d ::::: %d/%u --- %u - pipe: %d ===> %s\n", item->elementType, item->start, item->end, item->position, item->hasPipe, item->stringData);
}
for (int i = 0, j = readerData.countEntities; i < j; ++i) {
struct entry* item = &readerData.entriesEntities;
printf("%d ::::: %d/%u --- %u - pipe: %d ===> %s\n", item->elementType, item->start, item->end, item->position, item->hasPipe, item->stringData);
}
#endif
memset(indentation, ' ', IDENTATIONBUFFER);
indentation[currentDepth * 2] = '\0';
if ((tmp = getInLine(&readerData, XMLTAG, lineNum, 0)) != NULL) {
if (tmp->start == lineNum && !tmp->isHandledTag) {
tmp->isHandledTag = true;
#if DEBUG
printf("%s<%s", indentation, tmp->stringData);
#endif
#if DOWRITEOUT
fprintf(outputFile, "%s<%s", indentation, tmp->stringData);
#endif
for (int j = 0; j < tmp->xmlDataCount; ++j) {
#if DEBUG
printf(" %s=\"%s\"", tmp->xmlData[j].key, tmp->xmlData[j].value);
#endif
#if DOWRITEOUT
fprintf(outputFile, " %s=\"%s\"", tmp->xmlData[j].key, tmp->xmlData[j].value);
#endif
}
if (!tmp->isDataNode) {
if (tmp->start != tmp->end) {
#if DEBUG
printf("%s", ">\n");
#endif
#if DOWRITEOUT
fputc('\n', outputFile);
#endif
}
} else {
#if DEBUG
printf("%s", ">");
#endif
#if DOWRITEOUT
fputc('>', outputFile);
#endif
if (tmp->end == lineNum) hasReplaced = true;
}
if (tmp->start == tmp->end) closeTag = tmp;
else {
++currentDepth;
memset(indentation, ' ', IDENTATIONBUFFER);
indentation[currentDepth * 2] = '\0';
}
} else if (tmp->end == lineNum) {
closeTag = tmp;
}
}
if ((tmp = getInLine(&readerData, WIKITAG, lineNum, position)) != NULL) {
memset(preSpaces, ' ', sizeof(preSpaces));
memset(subSpaces, ' ', sizeof(subSpaces));
preSpaces[tmp->preSpacesCount] = '\0';
subSpaces[tmp->spacesCount] = '\0';
lastWikiTypePos = tmp->position;
lastWikiType = tmp->tagType;
tmpMax = tmp->start + LOOKAHEADRANGE;
#if DEBUG
printf("%s%s%s", preSpaces, tagTypes[tmp->tagType], tmp->stringData);
#endif
#if DOWRITEOUT
fprintf(outputFile, "%s%s%s", preSpaces, tagTypes[tmp->tagType], tmp->stringData);
#endif
if (tmp->hasPipe) {
#if DEBUG
printf("%s", "|");
#endif
#if DOWRITEOUT
fputc('|', outputFile);
#endif
}
while(readWikitag(&readerData)) {
if ((preTmp = getEntryByType(&readerData, WIKITAG)) != NULL && preTmp->start >= tmpMax) break;
}
freeEntryByLine(&readerData, WIKITAG, lineNum, position);
hasReplaced = true;
}
if ((tmp = getInLine(&readerData, WORD, lineNum, position)) != NULL) {
memset(preSpaces, ' ', sizeof(preSpaces));
memset(subSpaces, ' ', sizeof(subSpaces));
preSpaces[tmp->preSpacesCount] = '\0';
subSpaces[tmp->spacesCount] = '\0';
if (tmp->connectedTag != -1) {
lastWikiTypePos = tmp->position;
#if DEBUG
printf("%s%s%s", preSpaces, tmp->stringData, subSpaces);
#endif
#if DOWRITEOUT
fprintf(outputFile, "%s%s%s", preSpaces, tmp->stringData, subSpaces);
#endif
if (tmp->hasPipe) {
#if DEBUG
printf("%s", "|");
#endif
#if DOWRITEOUT
fputc('|', outputFile);
#endif
}
} else {
#if DEBUG
printf("%s%s%s", preSpaces, tmp->stringData, subSpaces);
#endif
#if DOWRITEOUT
fprintf(outputFile, "%s%s%s", preSpaces, tmp->stringData, subSpaces);
#endif
}
tmpMax = tmp->start + LOOKAHEADRANGE;
while(readWord(&readerData)) {
if ((preTmp = getEntryByType(&readerData, WORD)) != NULL && preTmp->start >= tmpMax) break;
}
freeEntryByLine(&readerData, WORD, lineNum, position);
hasReplaced = true;
}
if ((tmp = getInLine(&readerData, ENTITY, lineNum, position)) != NULL) {
memset(preSpaces, ' ', sizeof(preSpaces));
memset(subSpaces, ' ', sizeof(subSpaces));
preSpaces[tmp->preSpacesCount] = '\0';
subSpaces[tmp->spacesCount] = '\0';
if (tmp->connectedTag != -1) {
lastWikiTypePos = tmp->position;
#if DEBUG
printf("%s%s%s", preSpaces, tmp->stringData, subSpaces);
#endif
#if DOWRITEOUT
fprintf(outputFile, "%s%s%s", preSpaces, tmp->stringData, subSpaces);
#endif
if (tmp->hasPipe) {
#if DEBUG
printf("%s", "|");
#endif
#if DOWRITEOUT
fputc('|', outputFile);
#endif
}
} else {
#if DEBUG
printf("%s%s%s", preSpaces, tmp->stringData, subSpaces);
#endif
#if DOWRITEOUT
fprintf(outputFile, "%s%s%s", preSpaces, tmp->stringData, subSpaces);
#endif
}
tmpMax = tmp->start + LOOKAHEADRANGE;
while(readEntity(&readerData)) {
if ((preTmp = getEntryByType(&readerData, ENTITY)) != NULL && preTmp->start >= tmpMax) break;
}
freeEntryByLine(&readerData, ENTITY, lineNum, position);
hasReplaced = true;
}
if (lastWikiType != -1 && lastWikiTypePos != -1) {
tmp = getInLine(&readerData, WORD, lineNum, position + 1);
if (!tmp || (tmp && tmp->connectedTag == -1)) {
// TODO: Add table closing here.
char closing[3];
strcpy(closing, lastWikiType > 3 ? tagClosingsTypes[1] : tagClosingsTypes[0]);
#if DEBUG
printf("%s", closing);
#endif
#if DOWRITEOUT
fprintf(outputFile, "%s", closing);
#endif
lastWikiType = -1;
}
}
if (hasReplaced) {
++position;
continue;
}
if ((closeTag = getInLine(&readerData, XMLTAG, lineNum, 0)) != NULL) {
if (closeTag != NULL) {
if (closeTag->isDataNode && closeTag->end == lineNum) {
#if DEBUG
printf("</%s>\n", closeTag->stringData);
#endif
#if DOWRITEOUT
fprintf(outputFile, "</%s>\n", closeTag->stringData);
#endif
freeEntryByLine(&readerData, XMLTAG, lineNum, 0);
} else if (closeTag->end == closeTag->start) {
#if DEBUG
printf(" />\n");
#endif
#if DOWRITEOUT
fprintf(outputFile, " />\n");
#endif
freeEntryByLine(&readerData, XMLTAG, lineNum, 0);
} else if (!closeTag->isDataNode && closeTag->end == lineNum) {
if (currentDepth != 0) --currentDepth;
memset(indentation, ' ', IDENTATIONBUFFER);
indentation[currentDepth * 2] = '\0';
#if DEBUG
printf( "%s</%s>\n", indentation, closeTag->stringData);
#endif
#if DOWRITEOUT
fprintf(outputFile, "%s</%s>\n", indentation, closeTag->stringData);
#endif
freeEntryByLine(&readerData, XMLTAG, lineNum, 0);
} else if (closeTag->start != closeTag->end && closeTag->isDataNode) {
#if DEBUG
printf("%s", "\n");
#endif
#if DOWRITEOUT
fputc('\n', outputFile);
#endif
}
closeTag = NULL;
readXMLtag(&readerData);
}
} else {
#if DEBUG
printf("%s", "\n");
#endif
#if DOWRITEOUT
fputc('\n', outputFile);
#endif
if (currentDepth > 0 && currentDepth == previousDepth) --currentDepth;
}
position = 1;
++lineNum;
}
long int duration = difftime(time(NULL), startTime);
long int durHours = floor(duration / 3600);
long int durMinutes = floor((duration % 3600) / 60);
long int durSeconds = (duration % 3600) % 60;
printf("\n\n[STATUS] TRANSPILLING DATA PROCESS: %ldh %ldm %lds\n", durHours, durMinutes, durSeconds);
// Cleanup
fclose(readerData.outputFile);
fclose(readerData.dictFile);
fclose(readerData.wtagFile);
fclose(readerData.xmltagFile);
fclose(readerData.xmldataFile);
fclose(readerData.entitiesFile);
for (unsigned int i = 0; i < readerData.countXmltag; ++i) {
if (readerData.entriesXmltag.stringData) free(readerData.entriesXmltag.stringData);
for (unsigned int j = 0; j < readerData.entriesXmltag.xmlDataCount; ++j) {
if (readerData.entriesXmltag.xmlData[j].key) free(readerData.entriesXmltag.xmlData[j].key);
if (readerData.entriesXmltag.xmlData[j].value) free(readerData.entriesXmltag.xmlData[j].value);
}
if (readerData.entriesXmltag.xmlData) free(readerData.entriesXmltag.xmlData);
}
for (unsigned int i = 0; i < readerData.countWords; ++i) {
if (readerData.entriesWords.stringData) free(readerData.entriesWords.stringData);
}
for (unsigned int i = 0; i < readerData.countWtag; ++i) {
if (readerData.entriesWtag.stringData) free(readerData.entriesWtag.stringData);
}
for (unsigned int i = 0; i < readerData.countEntities; ++i) {
if (readerData.entriesEntities.stringData) free(readerData.entriesEntities.stringData);
}
free(readerData.entriesXmltag);
free(readerData.entriesWords);
free(readerData.entriesWtag);
free(readerData.entriesEntities);
return 0;
}
//------------------------------------------------------------------------------
bool readXMLtag(struct collection *readerData) {
if (readerData->readxmltag >= readerData->xmltagSize) return false;
if ((readerData->entriesXmltag = (struct entry *) realloc(readerData->entriesXmltag, sizeof(struct entry) * (readerData->countXmltag + 1))) == NULL) {
return false;
}
int returnValue = 0;
struct entry *element = &readerData->entriesXmltag[readerData->countXmltag];
element->elementType = XMLTAG;
element->stringData = NULL;
element->start = 0;
element->end = 0;
element->isClosed = false;
element->isDataNode = false;
element->xmlData = NULL;
element->xmlDataCount = 0;
element->position = 1;
element->connectedTag = -1;
element->isHandledTag = false;
element->hasPipe = false;
returnValue = fscanf(readerData->xmltagFile, "%u\t%u\t%hi\t%hi\t", &element->start, &element->end, &element->isClosed, &element->isDataNode);
if (returnValue == EOF) return false;
element = &readerData->entriesXmltag[readerData->countXmltag];
element->stringData = malloc(sizeof(char) * XMLTAG_BUFFER);
unsigned int dataCount = 0;
unsigned int start = 0;
unsigned int end = 0;
char tmpKey[256] = "\0";
char tmpValue[1280] = "\0";
if (readerData->readxmldata <= readerData->xmldataSize) {
fpos_t readBytes;
while (true) {
fgetpos(readerData->xmldataFile, &readBytes);
if (readBytes.__pos == readerData->xmldataSize) break;
returnValue = fscanf(readerData->xmldataFile, "%d\t%d\t", &start, &end);
if (returnValue == EOF) break;
if (start == element->start && end == element->end) {
fscanf(readerData->xmldataFile, "%s\t%[^\n]s", tmpKey, tmpValue);
element = &readerData->entriesXmltag[readerData->countXmltag];
element->xmlData = (struct xmldatakv *)realloc(element->xmlData, sizeof(struct xmldatakv) * (dataCount + 1));
element->xmlData[dataCount].key = malloc(sizeof(char) * (strlen(tmpKey) + 1));
strcpy(element->xmlData[dataCount].key, tmpKey);
element->xmlData[dataCount].value = malloc(sizeof(char) * (strlen(tmpValue) + 1));
strcpy(element->xmlData[dataCount].value, tmpValue);
++dataCount;
#if DEBUG
//printf("xmlData =>\t%s\t==>\t%s\n", tmpKey, tmpValue);
#endif
} else {
fsetpos(readerData->xmldataFile, &readBytes);
break;
}
}
element->xmlDataCount = dataCount;
fpos_t pos;
fgetpos(readerData->xmldataFile, &pos);
readerData->readxmldata = pos.__pos;
}
returnValue = fscanf(readerData->xmltagFile, "%[^\n]s\n", element->stringData);
if (returnValue == EOF) return false;
fpos_t pos;
fgetpos(readerData->xmltagFile, &pos);
readerData->xmltagSize = pos.__pos;
readerData->entriesXmltag[readerData->countXmltag] = *element;
++readerData->countXmltag;
return true;
}
//------------------------------------------------------------------------------
bool readWikitag(struct collection *readerData) {
if (readerData->readwtag >= readerData->wtagSize) return false;
if ((readerData->entriesWtag = (struct entry *)realloc(readerData->entriesWtag, sizeof(struct entry) * (readerData->countWtag + 1))) == NULL) {
return false;
}
int returnValue = 0;
struct entry *element = &readerData->entriesWtag[readerData->countWtag];
element->elementType = WIKITAG;
element->stringData = NULL;
element->xmlData = NULL;
element->xmlDataCount = 0;
element->start = 0;
element->end = 0;
element->preSpacesCount = 0;
element->spacesCount = 0;
element->position = 0;
element->connectedTag = -1;
element->tagType = 0;
element->tagLength = 0;
element->dataFormatType = -1;
element->ownFormatType = -1;
element->formatStart = 0;
element->formatEnd = 0;
element->tagLength= 0;
element->isHandledTag = false;
element->hasPipe = false;
returnValue = fscanf(readerData->wtagFile, "%u\t%u\t%d\t%d\t%hi\t%hi\t%hi\t%d\t%d\t%d\t%d\t%hi\t", &element->position, &element->start, &element->preSpacesCount, &element->spacesCount, &element->tagType, &element->dataFormatType, &element->ownFormatType, &element->formatStart, &element->formatEnd, &element->tagLength, &element->length, &element->hasPipe);
if (returnValue == EOF) return false;
element->end = element->start;
element->stringData = malloc(sizeof(char) * (element->length + 1));
returnValue = fscanf(readerData->wtagFile, "%[^\n]s\n", element->stringData);
if (returnValue == EOF) return false;
fpos_t pos;
fgetpos(readerData->wtagFile, &pos);
readerData->readwtag = pos.__pos;
readerData->entriesWtag[readerData->countWtag] = *element;
++readerData->countWtag;
return true;
}
//------------------------------------------------------------------------------
bool readWord(struct collection *readerData) {
if (readerData->readDict >= readerData->dictSize) return false;
if ((readerData->entriesWords = (struct entry *)realloc(readerData->entriesWords, sizeof(struct entry) * (readerData->countWords + 1))) == NULL) {
return false;
}
int returnValue = 0;
struct entry *element = &readerData->entriesWords[readerData->countWords];
element->elementType = WORD;
element->stringData = NULL;
element->xmlData = NULL;
element->xmlDataCount = 0;
element->start = 0;
element->end = 0;
element->position = 0;
element->connectedTag = -1;
element->isHandledTag = false;
element->hasPipe = false;
returnValue = fscanf(readerData->dictFile, "%u\t%u\t%d\t%u\t%u\t%u\t%hi\t%hi\t%d\t%d\t%hi\t", &element->position, &element->start, &element->connectedTag, &element->preSpacesCount, &element->spacesCount, &element->length, &element->dataFormatType, &element->ownFormatType, &element->formatStart, &element->formatEnd, &element->hasPipe);
element->end = element->start;
if (returnValue == EOF) return false;
element->stringData = malloc(sizeof(char) * (element->length + 1));
returnValue = fscanf(readerData->dictFile, "%[^\n]s\n", element->stringData);
if (returnValue == EOF) return false;
fpos_t pos;
fgetpos(readerData->dictFile, &pos);
readerData->readDict = pos.__pos;
readerData->entriesWords[readerData->countWords] = *element;
++readerData->countWords;
return true;
}
//------------------------------------------------------------------------------
bool readEntity(struct collection *readerData) {
if (&readerData->readentities >= &readerData->entitiesSize) return false;
if ((readerData->entriesEntities = (struct entry *)realloc(readerData->entriesEntities, sizeof(struct entry) * (readerData->countEntities + 1))) == NULL) {
return false;
}
int returnValue = 0;
struct entry *element = &readerData->entriesEntities[readerData->countEntities];
element->elementType = ENTITY;
element->stringData = NULL;
element->xmlData = NULL;
element->xmlDataCount = 0;
element->start = 0;
element->end = 0;
element->position = 0;
element->connectedTag = -1;
element->isHandledTag = false;
element->hasPipe = false;
returnValue = fscanf(readerData->entitiesFile, "%u\t%u\t%d\t%d\t%d\t%hi\t%hi\t%d\t%d\t%hi\t", &element->position, &element->start, &element->connectedTag, &element->preSpacesCount, &element->spacesCount, &element->dataFormatType, &element->ownFormatType, &element->formatStart, &element->formatEnd, &element->hasPipe);
element->end = element->start;
if (returnValue == EOF) return false;
element->stringData = (char*) realloc(element->stringData, sizeof(char) * 24);
returnValue = fscanf(readerData->entitiesFile, "%[^\n]s\n", element->stringData);
if (returnValue == EOF) return false;
fpos_t pos;
fgetpos(readerData->entitiesFile, &pos);
readerData->readentities = pos.__pos;
readerData->entriesEntities[readerData->countEntities] = *element;
++readerData->countEntities;
return true;
}[/src]