#include <stdio.h>
#include <stdlib.h>
#define MAXBUFLEN 1024
char* getLocation(char* source)
{
const char *p1 = strstr(source, "Location:")+10;
const char *p2 = strstr(p1, "\n");
size_t len = p2-p1;
char *res = (char*)malloc(sizeof(char)*(len+1));
strncpy(res, p1, len);
res[len] = '\0';
return res;
}
int main()
{
char source[MAXBUFLEN];
char host[100];
int port;
FILE *fp = fopen("source.txt", "r");
if (fp != NULL) {
size_t newLen = fread(source, sizeof(char), MAXBUFLEN, fp);
if (newLen == 0) {
fputs("Error reading file", stderr);
} else {
source[++newLen] = '\0';
//extraction code
char* res = getLocation(source);
printf("getLocation result: %s\n", res);
res = strstr(res,"://");
if (res != NULL)
{
res = res+3;
if (strstr(res, ":"))
{
sscanf(res, "%[^:]:%d[^/]", host, &port);
printf("host: %s | port: %d\n", host, port);
}
else
printf("delimiter not found\n");
}
else
printf("no link\n");
//
}
}
fclose(fp);
}
extract Location from http socket
Вопрос
I have the following http reply saved in a local file called source.txt:
HTTP/1.1 301 Moved
Connection: close
Content-length: 111
Location: https://11.12.13.14:81/
Content-type: text/html; charset="utf-8"
<html><head><META HTTP-EQUIV="refresh" CONTENT="0;URL=https://11.12.13.14:81/"></head><body></body></html>
and the following code:
#include <stdio.h>
#include <stdlib.h>
#define MAXBUFLEN 1024
char* getLocation(char* source)
{
const char *p1 = strstr(source, "Location:")+10;
const char *p2 = strstr(p1, "\n");
size_t len = p2-p1;
char *res = (char*)malloc(sizeof(char)*(len+1));
strncpy(res, p1, len);
res[len] = '\0';
return res;
}
char* getData(char* source)
{
const char *p1 = strstr(source, "://")+3;
const char *p2 = strstr(p1, "\n");
size_t len = p2-p1;
char *res = (char*)malloc(sizeof(char)*(len+1));
strncpy(res, p1, len);
res[len] = '\0';
return res;
}
int main()
{
char source[MAXBUFLEN];
char host[100];
int port;
FILE *fp = fopen("source.txt", "r");
if (fp != NULL) {
size_t newLen = fread(source, sizeof(char), MAXBUFLEN, fp);
if (newLen == 0) {
fputs("Error reading file", stderr);
} else {
source[++newLen] = '\0';
//extraction code
char* line = getLocation(source);
printf("getLocation result: %s\n", line);
if (strstr(line, "://"))
{
char* res = getData(line);//here is the error
printf("getData result: %s\n", res);
if (strstr(res, ":"))
{
sscanf(res, "%[^:]:%d[^/]", host, &port);
printf("host: %s | port: %d\n", host, port);
}
else
printf("delimiter not found\n");
}
else
printf("no link\n");
//
}
}
fclose(fp);
}
The program is working good but it's very ugly.
Is there any way to improve the code to avoid doing so many operations?
I mean merging somehow those 2 functions getLocation and getData ...
EDIT: my mistake, getData must extract from res not from source
Решение 3
Другие советы
Something like this would be the obvious way:
char * getstuff(char * source, char * label) {
const char *p1 = strstr(source, label) + strlen(label);
const char *p2 = strstr(p1, "\n");
size_t len = p2-p1;
char *res = malloc(len+1);
if ( res == NULL ) {
fputs("Couldn't allocate memory.", stderr);
exit(EXIT_FAILURE);
}
strncpy(res, p1, len);
res[len] = '\0';
return res;
}
char* getLocation(char* source) {
return getstuff(source, "Location: ");
}
char* getData(char* source) {
return getstuff(source, "://");
}
or just having getstuff()
and leaving out getLocation()
and getData()
entirely, if you're only going to call each function once.
Assuming that you are working on linux
,
I have an answer in awk:
awk '///:/{print $2}' source.txt
will act just like your getLocation()
and i suspect getData()
should actually give you the html content
( but your code returns the string same as getLocation()
but without http://
). So, here is my awk
code for getting html
content.
awk '/<html>/{print $0}' source.txt
will give you the actual content of the html reply.(of course I assumed no \n
characters in the content. But can be easily extended).
To integrate this into your code, just do the following:
system("command >> op.txt");
where command
refers to the two awk commands I wrote previously. You can then read the output from the file op.txt
. 30 lines of code to just 2 lines ( + some code to read the op.txt
). I hope this helps. :) :)