OpenSSL VPN Serveurs de messagerie |
Libxml2/Libxml2-html-parsingDans le répertoire \libxml2-2.6.23\win32\bin.msvc se trouve un exécutable permettant le parsing d'un fichier HTML.
Le parsing d'une page contenant des erreurs grâves peut se faire en ne créant pas des fonctions SAX error callbacks correspondantes. Voici le code source d'un autre programme faisant aussi l'analyse syntaxique de pages HTML #include <stdio.h> #include <libxml/HTMLparser.h> int iDepth=0; static void endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name) { iDepth--; } static void startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts) { iDepth++; fprintf(stdout,"%2i.",iDepth); for (int i = 0; i < iDepth; i++) fprintf(stdout," "); fprintf(stdout, "<%s", (char *) name); if (atts != NULL) { for (i = 0;(atts[i] != NULL);i++) { fprintf(stdout, ", %s='", atts[i++]); if (atts[i] != NULL) fprintf(stdout, "%s'", atts[i]); } } fprintf(stdout, ">\n"); } static xmlSAXHandler debugSAXHandlerStruct = { NULL, /* internalSubset */ NULL, /* isStandalone */ NULL, /* hasInternalSubset */ NULL, /* hasExternalSubset */ NULL, /* resolveEntity */ NULL, /* getEntity */ NULL, /* entityDecl */ NULL, /* notationDecl */ NULL, /* attributeDecl */ NULL, /* elementDecl */ NULL, /* unparsedEntityDecl */ NULL, /* setDocumentLocator */ NULL, /* startDocument */ NULL, /* endDocument */ startElementDebug, /* startElement */ endElementDebug, /* endElement */ NULL, /* reference */ NULL, /* characters */ NULL, /* ignorableWhitespace */ NULL, /* processingInstruction */ NULL, /* comment */ NULL, /* xmlParserWarning */ NULL, /* xmlParserError */ NULL, /* xmlParserError */ NULL, /* getParameterEntity */ NULL, /* cdataBlock */ NULL, /* externalSubset */ 1, /* initialized */ NULL, /* private */ NULL, /* startElementNsSAX2Func */ NULL, /* endElementNsSAX2Func */ NULL /* xmlStructuredErrorFunc */ }; xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct; int main(int argc, char* argv[]) { FILE *f; f = fopen(argv[1], "r"); if (f != NULL) { int res, size = 1024; char chars[1024]; htmlParserCtxtPtr ctxt; res = fread(chars, 1, 4, f); if (res > 0) { ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,chars, 4,argv[1],XML_CHAR_ENCODING_UTF8); while ((res = fread(chars, 1, size, f)) > 0) { htmlParseChunk(ctxt, chars, res, 0); } xmlParseChunk(ctxt, chars, 0, 1); xmlFreeParserCtxt(ctxt); } } return 0; } |