patx/mrhttp-asgi
Avx2 for path parse
Commit d547b9a · Mark Reed · 2024-03-13T22:19:17-07:00
Comments
No comments yet.
Diff
diff --git a/bench/sanic/readme b/bench/sanic/readme
index 0c84d7f..224ae28 100644
--- a/bench/sanic/readme
+++ b/bench/sanic/readme
@@ -1,5 +1,4 @@
-pip install sanic
-pip install sanic_session
+pip install sanic sanic_session
wrk -t 4 -c 32 -d 2 http://localhost:8080/
diff --git a/dotests.py b/dotests.py
index a7dea80..a8a88f3 100644
--- a/dotests.py
+++ b/dotests.py
@@ -144,7 +144,7 @@ try:
opts = ('-H','XX-Real-IP: 1.2.3.4')
#print ("get ip ", run_wrk(loop,'http://localhost:8080/getip',options=opts), "Requests/second" )
- #print ("many num args ", run_wrk(loop, 'http://localhost:8080/sixargs/155/2001/29999/25/29999543/93243242394'), "Requests/second" )
+ print ("many num args ", run_wrk(loop, 'http://localhost:8080/sixargs/155/2001/29999/25/29999543/93243242394'), "Requests/second" )
#print ("404 ", run_wrk(loop, 'http://localhost:8080/404/'), "Requests/second" )
# Grab the stdout for debug
diff --git a/gbench/parse.cpp b/gbench/parse.cpp
index 7a446be..46f776f 100644
--- a/gbench/parse.cpp
+++ b/gbench/parse.cpp
@@ -20,6 +20,13 @@
#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
+#define CHECK_END() \
+ if (buf == buf_end) { \
+ *ret = -2; \
+ return NULL; \
+ }
+
+
#define CHECK_EOF() \
if (buf == buf_end) { \
*ret = -2; \
@@ -36,6 +43,7 @@
CHECK_EOF(); \
EXPECT_CHAR_NO_CHECK(ch);
+
// Table for converting to lower case
#define TOLC(c) __lct[(unsigned char)c]
static const unsigned char __lct[] __attribute__((aligned(64))) = {
@@ -133,6 +141,33 @@ static const char *findchar(const char *buf, const char *buf_end, const char *ra
}
return buf;
}
+static const char *adv_token(const char *buf, int *ret) {
+ const char *tok_start = buf;
+ const char *buf_end = buf+512;
+ static const char ranges2[] = "\000\042\177\177";
+ int found2;
+ buf = findchar(buf, buf+512, ranges2, sizeof(ranges2) - 1, &found2);
+ if (!found2) {
+ CHECK_END();
+ } else if ( unlikely(*buf != ' ' )) {
+ *ret = -1;
+ return NULL;
+ }
+ while (1) {
+ if (*buf == ' ') {
+ return buf;
+ } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
+ if ((unsigned char)*buf < '\040' || *buf == '\177') {
+ *ret = -1;
+ return NULL;
+ }
+ }
+ ++buf;
+ CHECK_END();
+ }
+ *ret = buf - tok_start;
+ return tok_start;
+}
static const char *get_token_to_eol(const char *buf, const char *buf_end, int *ret)
{
@@ -169,6 +204,60 @@ FOUND_CTL:
return buf;
}
+static const char *my_get_eol128(const char *buf) {
+ //__m128i* pSrc1 = (__m128i *)string; // init pointer to start of string
+ __m128i m0 = _mm_set1_epi8(13); // vector of 16 `\0` characters
+
+ while (1)
+ {
+ __m128i v0 = _mm_loadu_si128((const __m128i *)buf);
+ __m128i v1 = _mm_cmpeq_epi8(v0, m0); // compare all 16 chars
+ unsigned int vmask = _mm_movemask_epi8(v1); // get 16 comparison result bits
+ if (vmask != 0) {
+ buf += TZCNT(vmask) + 2;
+ break; // we found a `\0`, break out of loop
+ }
+ buf += 16; //pSrc1++; // next 16 characters...
+ }
+ return buf;
+}
+
+ //64bits 256bits bytes 8 * 32
+__m256i m13 = _mm256_set1_epi8(13);
+__m256i m32 = _mm256_set1_epi8(32);
+static const char *my_get_eol(const char *buf) {
+
+ while (1)
+ {
+ __m256i v0 = _mm256_loadu_si256((const __m256i *)buf);
+ __m256i v1 = _mm256_cmpeq_epi8(v0, m13);
+ unsigned long vmask = _mm256_movemask_epi8(v1);
+ if (vmask != 0) {
+ buf += TZCNT(vmask) + 2;
+ break;
+ }
+ buf += 32; //pSrc1++;
+ }
+ return buf;
+}
+static const char *get_to_space(const char *buf, int *len) {
+ const char *orig = buf;
+ while (1)
+ {
+ __m256i v0 = _mm256_loadu_si256((const __m256i *)buf);
+ __m256i v1 = _mm256_cmpeq_epi8(v0, m32);
+ unsigned long vmask = _mm256_movemask_epi8(v1);
+ if (vmask != 0) {
+ buf += TZCNT(vmask) + 1;
+ break;
+ }
+ buf += 32;
+ }
+ *len = buf-orig-1;
+ return buf;
+}
+
+
static const char *parse_headers(const char *buf, const char *buf_end, int *ret)
{
@@ -227,7 +316,8 @@ static const char *parse_headers(const char *buf, const char *buf_end, int *ret)
//if ( buf[0] == 'a' && buf[13] == 'r' ) { //"application/mrpacker"
//mrr->flags = 2;
//}
- buf = get_token_to_eol(buf, buf_end, ret);
+ //buf = get_token_to_eol(buf, buf_end, ret);
+ buf = my_get_eol(buf);
goto skipvalue;
}
if ( buf[13] == ':' ) { // Cache-Control:
@@ -247,7 +337,8 @@ static const char *parse_headers(const char *buf, const char *buf_end, int *ret)
//headers[*num_headers].name_len = 16;
buf += 18;
//mrr->ip = buf;
- buf = get_token_to_eol(buf, buf_end, ret);
+ //buf = get_token_to_eol(buf, buf_end, ret);
+ buf = my_get_eol(buf);
//mrr->ip_len = headers[*num_headers].value_len;
goto skipvalue;
}
@@ -274,7 +365,8 @@ static const char *parse_headers(const char *buf, const char *buf_end, int *ret)
//headers[*num_headers].name_len = 9;
buf += 11;
//mrr->ip = buf;
- buf = get_token_to_eol(buf, buf_end, ret);
+ //buf = get_token_to_eol(buf, buf_end, ret);
+ buf = my_get_eol(buf);
//mrr->ip_len = headers[*num_headers].value_len;
goto skipvalue;
}
@@ -441,7 +533,8 @@ static const char *parse_headers(const char *buf, const char *buf_end, int *ret)
//headers[*num_headers].name_len = 0;
}
hvalue:
- if ((buf = get_token_to_eol(buf, buf_end, ret)) == NULL) {
+ //if ((buf = get_token_to_eol(buf, buf_end, ret)) == NULL) {
+ if ((buf = my_get_eol(buf)) == NULL) {
return NULL;
}
skipvalue:
@@ -661,42 +754,6 @@ wedone:
}
-static const char *my_get_eol128(const char *buf) {
- //__m128i* pSrc1 = (__m128i *)string; // init pointer to start of string
- __m128i m0 = _mm_set1_epi8(13); // vector of 16 `\0` characters
-
- while (1)
- {
- __m128i v0 = _mm_loadu_si128((const __m128i *)buf);
- __m128i v1 = _mm_cmpeq_epi8(v0, m0); // compare all 16 chars
- unsigned int vmask = _mm_movemask_epi8(v1); // get 16 comparison result bits
- if (vmask != 0) {
- buf += TZCNT(vmask) + 2;
- break; // we found a `\0`, break out of loop
- }
- buf += 16; //pSrc1++; // next 16 characters...
- }
- return buf;
-}
-
- //64bits 256bits bytes 8 * 32
-__m256i m13 = _mm256_set1_epi8(13);
-
-static const char *my_get_eol(const char *buf) {
-
- while (1)
- {
- __m256i v0 = _mm256_loadu_si256((const __m256i *)buf);
- __m256i v1 = _mm256_cmpeq_epi8(v0, m13);
- unsigned long vmask = _mm256_movemask_epi8(v1);
- if (vmask != 0) {
- buf += TZCNT(vmask) + 2;
- break;
- }
- buf += 32; //pSrc1++;
- }
- return buf;
-}
//__m256i m13 = _mm256_set1_epi8(13);
__m256i m58 = _mm256_set1_epi8(58); // 0x1313131313131313...
@@ -948,10 +1005,18 @@ static void parse_mysse4( const char* buf ) {
static char buf[8096] = "Host: server\r\n"
"User-Agent: Mozilla/5.0 (X11; Linux x86_64) Gecko/20130501 Firefox/30.0 AppleWebKit/600.00 Chrome/30.0.0000.0 Trident/10.0 Safari/600.00\r\n"
"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
+"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
+"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
+"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
+"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
+"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
+"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
+"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,* /*;q=0.8\r\n"
"Accept-Language: en-US,en;q=0.5\r\n"
"Connection: keep-alive\r\n\r\n";
static char buf2[8096] = "Host: localhost:8080\r\nUser-Agent: python-requests/2.31.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: * /*\r\nConnection: keep-alive\r\nCookie: foo=b=ar\r\nContent-Length: 0\r\n\r\n";
+static char path[8096] = "/foo/bar/bazfdasfffffffffffffffffffffffffffffffffffffffdfffffffffffffffffffffffffffffffffffffffffffffffffff ";
static void BM_SlowParse(benchmark::State& state) {
// Perform setup here
@@ -983,46 +1048,51 @@ static void BM_my_get_eol(benchmark::State& state) {
}
static void BM_my_header_parse(benchmark::State& state) {
- // Perform setup here
for (auto _ : state) {
- // This code gets timed
parse_mine(buf);
}
}
static void BM_my2_header_parse(benchmark::State& state) {
- // Perform setup here
for (auto _ : state) {
- // This code gets timed
parse_mine2(buf);
}
}
static void BM_my3_header_parse(benchmark::State& state) {
- // Perform setup here
for (auto _ : state) {
- // This code gets timed
parse_mine3(buf);
}
}
static void BM_old_header_parse(benchmark::State& state) {
- // Perform setup here
int ret = 0;
for (auto _ : state) {
- // This code gets timed
parse_headers(buf,buf+2048,&ret);
}
}
static void BM_avx2_header_parse(benchmark::State& state) {
- // Perform setup here
int ret = 0;
for (auto _ : state) {
- // This code gets timed
parse_headers_avx2(buf,buf+2048,&ret);
}
}
+static void BM_adv_token(benchmark::State& state) {
+ int ret = 0;
+ int path_len = 0;
+ for (auto _ : state) {
+ adv_token(path, &path_len);
+ }
+}
+static void BM_adv_token_avx2(benchmark::State& state) {
+ int ret = 0;
+ int path_len = 0;
+ for (auto _ : state) {
+ get_to_space(path, &path_len);
+ }
+}
+
@@ -1030,10 +1100,12 @@ static void BM_avx2_header_parse(benchmark::State& state) {
//BENCHMARK(BM_sse4_get_eol);
//BENCHMARK(BM_my_get_eol);
BENCHMARK(BM_my3_header_parse);
-BENCHMARK(BM_my2_header_parse);
+//BENCHMARK(BM_my2_header_parse);
//BENCHMARK(BM_my_header_parse);
BENCHMARK(BM_old_header_parse);
-BENCHMARK(BM_avx2_header_parse);
+//BENCHMARK(BM_avx2_header_parse);
+BENCHMARK(BM_adv_token);
+BENCHMARK(BM_adv_token_avx2);
BENCHMARK_MAIN();
/*
diff --git a/src/mrhttp/internals/mrhttpparser.c b/src/mrhttp/internals/mrhttpparser.c
index f10f3d2..7cb867c 100644
--- a/src/mrhttp/internals/mrhttpparser.c
+++ b/src/mrhttp/internals/mrhttpparser.c
@@ -73,35 +73,6 @@ static void print_buffer( char* b, int len ) {
CHECK_END(); \
EXPECT_CHAR_NO_CHECK(ch);
-#define ADVANCE_TOKEN(tok, toklen) \
- do { \
- const char *tok_start = buf; \
- static const char ALIGNED(16) ranges2[] = "\000\042\177\177"; \
- int found2; \
- buf = findchar(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2); \
- if (!found2) { \
- CHECK_END(); \
- } else if ( unlikely(*buf != ' ' )) { \
- *ret = -1; \
- return NULL; \
- } \
- while (1) { \
- if (*buf == ' ') { \
- break; \
- } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
- if ((unsigned char)*buf < '\040' || *buf == '\177') { \
- *ret = -1; \
- return NULL; \
- } \
- } \
- ++buf; \
- CHECK_END(); \
- } \
- tok = tok_start; \
- toklen = buf - tok_start; \
- } while (0)
-
-
static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
"\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
@@ -207,60 +178,30 @@ FOUND_CTL:
return buf;
}
-static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
-{
- int ret_cnt = 0;
- buf = last_len < 3 ? buf : buf + last_len - 3;
-
- while (1) {
- CHECK_END();
- if (*buf == '\015') {
- ++buf;
- CHECK_END();
- EXPECT_CHAR('\012');
- ++ret_cnt;
- } else if (*buf == '\012') {
- ++buf;
- ++ret_cnt;
- } else {
- ++buf;
- ret_cnt = 0;
- }
- if (ret_cnt == 2) {
- return buf;
- }
- }
-
- *ret = -2;
- return NULL;
-}
-
-#define PARSE_INT(valp_, mul_) \
- if (*buf < '0' || '9' < *buf) { \
- buf++; \
- *ret = -1; \
- return NULL; \
- } \
- *(valp_) = (mul_) * (*buf++ - '0');
-
-#define PARSE_INT_3(valp_) \
- do { \
- int res_ = 0; \
- PARSE_INT(&res_, 100) \
- *valp_ = res_; \
- PARSE_INT(&res_, 10) \
- *valp_ += res_; \
- PARSE_INT(&res_, 1) \
- *valp_ += res_; \
- } while (0)
-
-
#ifdef __AVX2__
static unsigned long TZCNT(unsigned long long in) {
unsigned long res;
asm("tzcnt %1, %0\n\t" : "=r"(res) : "r"(in));
return res;
}
+static int get_len_to_space(const char *buf, const char *buf_end) {
+ const char *orig = buf;
+ __m256i m32 = _mm256_set1_epi8(32);
+ while (1)
+ {
+ __m256i v0 = _mm256_loadu_si256((const __m256i *)buf);
+ __m256i v1 = _mm256_cmpeq_epi8(v0, m32);
+ unsigned long vmask = _mm256_movemask_epi8(v1);
+ if (vmask != 0) {
+ buf += TZCNT(vmask);
+ return buf-orig;
+ }
+ buf += 32;
+ if ( buf >= buf_end ) return -1;
+ }
+}
+
+
static const char *parse_headers_avx2(const char *buf, const char *buf_end, struct mr_header *headers, size_t *num_headers,
size_t max_headers, int *ret, struct mr_request *mrr)
{
@@ -285,6 +226,7 @@ static const char *parse_headers_avx2(const char *buf, const char *buf_end, stru
av_new512:
i = 0;
buf = obuf;
+ if ( buf >= buf_end ) { *ret = -1; return NULL; }
b0 = _mm256_loadu_si256((const __m256i *) (buf + 32*0)); // buf[0]
b1 = _mm256_loadu_si256((const __m256i *) (buf + 32*1)); // buf[32]
@@ -338,7 +280,7 @@ av_new512:
headers[*num_headers].value = sbuf;
headers[*num_headers].value_len = buf-sbuf;
++*num_headers;
- if (*num_headers >= max_headers) { printf("DELME hdr too many\n"); *ret = -1; return NULL; }
+ if (*num_headers >= max_headers) { *ret = -1; return NULL; }
name_or_value = 0;
buf += 2; if ( *buf == '\r' ) { goto av_done; } // \r\n\r\n marks the end
} else {
@@ -362,6 +304,7 @@ av_new512:
obuf += 512;
goto av_new512;
+
av_done:
buf += 2;
*ret = 0;
@@ -721,7 +664,6 @@ static const char *parse_request(const char *buf, const char *buf_end, const cha
}
// parse request line
- //ADVANCE_TOKEN(*method, *method_len);
// TODO Support other methods
switch (*(unsigned int *)buf) {
case CHAR4_TO_INT('G', 'E', 'T', ' '):
@@ -729,10 +671,16 @@ static const char *parse_request(const char *buf, const char *buf_end, const cha
case CHAR4_TO_INT('P', 'O', 'S', 'T'):
*method = buf; *method_len = 4; buf += 5; break;
default:
- *ret = -2;
+ *ret = -1;
return NULL;
}
- ADVANCE_TOKEN(*path, *path_len);
+ *path = buf;
+ int l = get_len_to_space(buf, buf_end);
+ if ( l == -1 ) {
+ *ret = -1; // TODO Should we return -2 (needs more bytes?)
+ return NULL;
+ }
+ buf += l; *path_len = l;
++buf;
switch (*(unsigned long *)buf) {
case CHAR8_TO_LONG('H', 'T', 'T', 'P','/','1','.','0'):
@@ -768,7 +716,7 @@ static __inline__ unsigned long long rdtsc(void)
}
int mr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
- size_t *path_len, int *minor_version, struct mr_header *headers, size_t *num_headers, size_t last_len,struct mr_request *mrr)
+ size_t *path_len, int *minor_version, struct mr_header *headers, size_t *num_headers, struct mr_request *mrr)
{
const char *buf = buf_start, *buf_end = buf_start + len;
size_t max_headers = *num_headers;
@@ -783,12 +731,6 @@ int mr_parse_request(const char *buf_start, size_t len, const char **method, siz
*minor_version = -1;
*num_headers = 0;
- /* if last_len != 0, check if the request is complete (a fast countermeasure
- againt slowloris */
- if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
- return r;
- }
-
if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers, &r, mrr)) == NULL) {
return r;
}
@@ -801,4 +743,3 @@ int mr_parse_request(const char *buf_start, size_t len, const char **method, siz
#undef CHECK_END
#undef EXPECT_CHAR
-#undef ADVANCE_TOKEN
diff --git a/src/mrhttp/internals/mrhttpparser.h b/src/mrhttp/internals/mrhttpparser.h
index ffd037e..ed20dfa 100644
--- a/src/mrhttp/internals/mrhttpparser.h
+++ b/src/mrhttp/internals/mrhttpparser.h
@@ -93,7 +93,7 @@ struct mr_request {
// These functions return -2 if partial request, -1 if parsing failed, and the number of bytes parsed otherwise
int mr_parse_request(const char *buf, size_t len, const char **method, size_t *method_len, const char **path, size_t *path_len,
- int *minor_version, struct mr_header *headers, size_t *num_headers, size_t last_len, struct mr_request *mrr);
+ int *minor_version, struct mr_header *headers, size_t *num_headers, struct mr_request *mrr);
struct mr_chunked_decoder {
size_t bytes_left_in_chunk;
diff --git a/src/mrhttp/internals/parser.c b/src/mrhttp/internals/parser.c
index afff786..6e7a6b0 100644
--- a/src/mrhttp/internals/parser.c
+++ b/src/mrhttp/internals/parser.c
@@ -81,12 +81,12 @@ parse_headers:
char *method, *path;
int rc, minor_version;
//struct phr_header headers[100];
- size_t prevbuflen = 0, method_len, path_len;//, num_headers;
+ size_t method_len, path_len;//, num_headers;
request->num_headers = 100; // Max allowed headers
DBG_PARSER printf("before parser requests\n");
request->hreq.flags = 0; // TODO clear the mr_request struct
- rc = mr_parse_request(self->start, self->end-self->start, (const char**)&method, &method_len, (const char**)&path, &path_len, &minor_version, request->headers, &(request->num_headers), prevbuflen, &(request->hreq));
+ rc = mr_parse_request(self->start, self->end-self->start, (const char**)&method, &method_len, (const char**)&path, &path_len, &minor_version, request->headers, &(request->num_headers), &(request->hreq));
DBG_PARSER printf("parser requests rc %d\n",rc);
if ( rc < 0 ) return rc; // -2 incomplete, -1 error otherwise byte len of headers
diff --git a/tst.py b/tst.py
index 2dbdaf5..7c36784 100755
--- a/tst.py
+++ b/tst.py
@@ -21,6 +21,10 @@ async def index(r):
#x = r.form
#return x["param2"]
[email protected]('/123456789123456789')
+async def long(r):
+ return "long"
+
@app.route('/json')
def json(r):
return r.json["name"]