Frage

In have the following match pattern in a C code:

static const char my_pattern[] = "([0-9]{20})[\\^]";

and the following string:

12345678901234567890^otherstrings^and^digits12345678901234567890

I want to match only:

12345678901234567890

why my result is:

12345678901234567890^

?

Thank you!

EDIT:

Full code example:

#include <stdio.h>
#include <string.h>
#include <pcre.h>

static const char my_pattern[] = "([0-9]{20})[\\^]";
static pcre* my_pcre = NULL;
static pcre_extra* my_pcre_extra = NULL;

void my_match(const char* src)
{
    printf("src: %s\n", src);
    int ovector[30]={0};
    int retex = pcre_exec(my_pcre, NULL, src, strlen(src), 0, 0, ovector, 30);
    if (retex == PCRE_ERROR_NOMATCH){
        printf("None match.\n");
    }
    else{
        printf("Matches %d\n", retex);
        const char *sp = NULL;
        pcre_get_substring((const char *)src, ovector, retex, 0, &sp);
        printf("%s\n", sp);
        pcre_free_substring(sp);
    }
    return;
}

int main()
{
    const char* err;
    int erroffset;
    my_pcre = pcre_compile(my_pattern, PCRE_CASELESS, &err, &erroffset, NULL);
    my_pcre_extra = pcre_study(my_pcre, 0, &err);
    my_match("12345678901234567890^otherstrings^and^digits12345678901234567890");
    return 0;
}
War es hilfreich?

Lösung

PCRE captures are in the output vector in pairs. Your full match is in the first pair, the sub-group capture is in the second. See below:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pcre.h>

static const char subject[] =
    "12345678901234567890^otherstrings^and^digits12345678901234567890";

#define OVSIZE  9

int main()
{
    static const char my_pattern[] = "([0-9]{20})[\\^]";
    const char *errtext = NULL;
    int errofs = 0;
    pcre* re = pcre_compile(my_pattern, 0, &errtext, &errofs, NULL);
    if (re != NULL)
    {
        int ov[OVSIZE];
        int rc = pcre_exec(re, NULL, subject, sizeof(subject), 0, 0, ov, OVSIZE);
        if (rc >= 0)
        {
            for (int i=0; i<rc; ++i)
                printf("group %d: %.*s\n", i, ov[2*i+1]-ov[2*i], subject+ov[2*i]);
        }
    }

    return 0;
}

Output

group 0: 12345678901234567890^
group 1: 12345678901234567890

Andere Tipps

This is the REGEX pattern I was looking for:

([0-9]{20})(?=\\^)

or, more elegant:

([\\d]{20})(?=\\^)
Lizenziert unter: CC-BY-SA mit Zuschreibung
Nicht verbunden mit StackOverflow
scroll top