patx/mrhttp-asgi

Performance improvements

Commit 2fadf3d · Mark Reed · 2024-03-15T13:36:27-07:00

Changeset
2fadf3d174c366b6ad5cb73f671dd3efcf9d3e62
Parents
d547b9a5ad813d457b9c7129d1f903fde097837f

View source at this commit

Comments

No comments yet.

Log in to comment

Diff

diff --git a/.gitignore b/.gitignore
index e5f6ecb..9582357 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,5 +11,6 @@ build/*
 gbench/parse
 gbench/t
 gbench/string
+gbench/query
 .DS_Store
 __pycache__
diff --git a/README.md b/README.md
index 59b5ced..45b3eeb 100644
--- a/README.md
+++ b/README.md
@@ -4,29 +4,38 @@ Async Python 3.5+ web server written in C
 # Benchmarks
 
 ```
-Hello pipelined  4,152,858 Requests/second
-Hello              633,097 Requests/second
-404                654,053 Requests/second
-Cookies            422,728 Requests/second
-Form parsing       328,780 Requests/second
-Parse JSON         224,872 Requests/second
-Templates          257,753 Requests/second
-Sessions:
-  memcached        163,833 Requests/second
-  mrcache          283,359 Requests/second
-MrWorkServer       338,891 Requests/second
-File Upload        132,242 Requests/second
+  Pipelined
+    Hello           6834994.51 Requests/second
+    More hdrs       6193307.49 Requests/second
+    Sessions        4396364.13 Requests/second
+    File Upload     3510289.14 Requests/second
+    mrpacker        2052674.93 Requests/second
+    Form            1182228.98 Requests/second
+
+  One by one
+    Hello           707667.74 Requests/second
+    Hello hdrs      728639.36 Requests/second
+    Cookies         588212.04 Requests/second
+    many args       691910.28 Requests/second
+    404 natural     763643.3 Requests/second
+    404             580424.69 Requests/second
+    Form parsing    338553.65 Requests/second
+    mrpacker        533242.09 Requests/second
+    Sessions        325354.58 Requests/second
+    File Upload     292331.03 Requests/second
+    get ip          503454.35 Requests/second
+    
 ```
 
 Versus sanic a pure python async server
 
 ```
-Hello World       64,366 Requests/second
-Cookies           50,867 Requests/second
-404                9,256 Requests/second
-forms             27,104 Requests/second
+Hello World       22,366 Requests/second
+Cookies           20,867 Requests/second
+404                8,256 Requests/second
+forms             11,104 Requests/second
 sessions           4,053 Requests/second
-File upload       21,457 Requests/second
+File upload        1,457 Requests/second
 ```
 
 Hello World Example
diff --git a/bench/sanic/session.py b/bench/sanic/session.py
new file mode 100644
index 0000000..6105421
--- /dev/null
+++ b/bench/sanic/session.py
@@ -0,0 +1,30 @@
+
+import aiomcache
+import uvloop
+
+from sanic import Sanic
+from sanic.response import text
+from sanic_session import Session, MemcacheSessionInterface
+
+app = Sanic("app")
+
+# create a memcache client
+client = aiomcache.Client("127.0.0.1", 11211)
+
+# pass the memcache client into the session
+session = Session(app, interface=MemcacheSessionInterface(client))
+
[email protected]("/")
+async def test(request):
+    # interact with the session like a normal dict
+    if not request.ctx.session.get('foo'):
+        request.ctx.session['foo'] = 0
+
+    request.ctx.session['foo'] += 1
+
+    response = text("YAY")
+
+    return response
+
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=8000, debug=True)
diff --git a/bench/sanic/tst.py b/bench/sanic/tst.py
index d07e503..f8ae93a 100644
--- a/bench/sanic/tst.py
+++ b/bench/sanic/tst.py
@@ -1,13 +1,15 @@
 
 import sanic
-from sanic.response import json
 
 app = sanic.Sanic("my-hello-world-app")
 
 @app.route('/')
 async def test(request):
-    return sanic.text("Hello World")
+  return sanic.text("Hello World")
+
[email protected]("/s")
+async def sess(request):
+  return sanic.text("session")
 
 if __name__ == '__main__':
     app.run(port=8080)
-
diff --git a/curl.sh b/curl.sh
index 38065e6..a9c7f15 100755
--- a/curl.sh
+++ b/curl.sh
@@ -1,5 +1,3 @@
-
-for run in {1..1000}
-do
-  curl -d "param1=value1&param2=value2" -X POST http://localhost:8080/ -H "Content-Type: application/x-www-form-urlencoded"
+for n in {1..10}; do
+  curl -d "param1=value1&param2=value2" -X POST http://localhost:8080/form -H "Content-Type: application/x-www-form-urlencoded"
 done
diff --git a/dotests.py b/dotests.py
index a8a88f3..e6bb305 100644
--- a/dotests.py
+++ b/dotests.py
@@ -1,6 +1,13 @@
 
 readme = """
-  pip install psutil requests msgpack mrasyncmc tenjin mrpacker
+  pip install psutil requests msgpack mrasyncmc tenjin mrpacker mrworkserver
+  mrcache and mrworkserver must be running for the benchmarks
+  mrcache:
+    git clone https://github.com/MarkReedZ/mrcache.git
+    cd mrcache; ./bld; ./mrcache
+  mrworkserver
+    python workserver.py    
+    
 """
 
 
@@ -11,12 +18,24 @@ import importlib
 
 import tests
 
+import argparse
+import sys
+import asyncio
+import os
+from asyncio.subprocess import PIPE, STDOUT
+import statistics
+
+import uvloop
+import psutil
+import atexit
+
 # TODO
-#  Check for memcached being up and add the session key so we hit and load the json 43709dd361cc443e976b05714581a7fb
-#     memcached -l 127.0.0.1 -p 11211 -d -m 50
+#  Check for mrworkserver and mrcache being up and add the session key so we hit and load the json 43709dd361cc443e976b05714581a7fb
+#     mrcache -m 64 -i 16
+#     python mrworkserver/tst.py
 
 
-if 1:
+async def run_tests():
   package = tests
   for importer, modname, ispkg in pkgutil.iter_modules(package.__path__):
     if modname.startswith("test"):
@@ -36,53 +55,33 @@ if 1:
         if f[0] == 'teardown':
           f[1]()
   
-print("Benchmarks")
     
-import argparse
-import sys
-import asyncio
-import os
-from asyncio.subprocess import PIPE, STDOUT
-import statistics
-
-import uvloop
-import psutil
-import atexit
 
 #from misc import cpu
 
 
-def run_wrk(loop, endpoint=None, lua=None, options=None):
+async def run_wrk(loop, endpoint=None, lua=None, options=None):
   rps = 0
   try: 
     endpoint = endpoint or 'http://localhost:8080/'
     if lua:
       if options != None:
-        wrk_fut = asyncio.create_subprocess_exec( 'wrk', '-t', '4', '-c', '32', '-d', '2', '-s', lua, *options, endpoint, stdout=PIPE, stderr=STDOUT)
+        proc = await asyncio.create_subprocess_exec( 'wrk', '-t', '4', '-c', '32', '-d', '2', '-s', lua, *options, endpoint, stdout=PIPE, stderr=STDOUT)
       else:
-        wrk_fut = asyncio.create_subprocess_exec( 'wrk', '-t', '4', '-c', '32', '-d', '2', '-s', lua, endpoint, stdout=PIPE, stderr=STDOUT)
+        proc = await asyncio.create_subprocess_exec( 'wrk', '-t', '4', '-c', '32', '-d', '2', '-s', lua, endpoint, stdout=PIPE, stderr=STDOUT)
     else:
       if options != None:
-        wrk_fut = asyncio.create_subprocess_exec( 'wrk', '-t', '4', '-c', '32', '-d', '2', *options, endpoint, stdout=PIPE, stderr=STDOUT)
+        proc = await asyncio.create_subprocess_exec( 'wrk', '-t', '4', '-c', '32', '-d', '2', *options, endpoint, stdout=PIPE, stderr=STDOUT)
       else:
-        wrk_fut = asyncio.create_subprocess_exec( 'wrk', '-t', '4', '-c', '32', '-d', '2', endpoint, stdout=PIPE, stderr=STDOUT)
+        proc = await asyncio.create_subprocess_exec( 'wrk', '-t', '4', '-c', '32', '-d', '2', endpoint, stdout=PIPE, stderr=STDOUT)
   
-    wrk = loop.run_until_complete(wrk_fut)
+    stdout, stderr = await proc.communicate()
     rps = 0
-    lines = []
-    while 1:
-      line = loop.run_until_complete(wrk.stdout.readline())
-      if line:
-        line = line.decode('utf-8')
-        lines.append(line)
-        if line.startswith('Requests/sec:'):
-          rps = float(line.split()[-1])
-      else:
-        break
+    lines = stdout.decode('utf-8').split("\n")
+    for line in lines:
+      if line.startswith('Requests/sec:'):
+        rps = float(line.split()[-1])
   
-    retcode = loop.run_until_complete(wrk.wait())
-    if retcode != 0:
-      print('\r\n'.join(lines))
   except Exception as e:
     print(e)
 
@@ -90,86 +89,76 @@ def run_wrk(loop, endpoint=None, lua=None, options=None):
   return rps
 
 
-noisy = ['atom', 'chrome', 'firefox', 'dropbox', 'opera', 'spotify', 'gnome-documents']
+async def run_benchmarks():
+  proc = await asyncio.create_subprocess_exec( 'python', 'tests/s_bench.py', stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE )
+  process = psutil.Process(proc.pid)
 
-def silence():
-  for proc in psutil.process_iter():
-    if proc.name() in noisy:
-      proc.suspend()
+  await asyncio.sleep(1)
 
-  def resume():
-    for proc in psutil.process_iter():
-      if proc.name() in noisy:
-        proc.resume()
-  atexit.register(resume)
+  if proc.returncode != None:
+    print("tests/s_bench.py failed to start:")
+    print(await proc.stdout.read())
+    print(await proc.stderr.read())
+    exit()
 
-silence()
+  print("Benchmarks")
 
-loop = uvloop.new_event_loop()
+  try:
+  
+    more_headers = ('-H','User-Agent: Mozilla/5.0 (X11; Linux x86_64) Gecko/20130501 Firefox/30.0 AppleWebKit/600.00 Chrome/30.0.0000.0 Trident/10.0 Safari/600.00',
+      '-H','Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+      '-H','Accept-Language: en-US,en;q=0.5',
+      '-H','Cookie: mrsession=43709dd361cc443e976b05714581a7fb; foo=fdsfdasdfasdfdsfasdfsdfsdfasdfas; short=fazc;',
+      '-H','Connection: keep-alive')
+    opts = ('-H','Cookie: mrsession=43709dd361cc443e976b05714581a7fb; foo=fdsfdasdfasdfdsfasdfsdfsdfasdfas; short=fazc;')
+
+    print("  Pipelined")
+    print ("    Hello          ", await run_wrk(loop, 'http://localhost:8080/',lua='tests/lua/pipeline.lua'), "Requests/second" )
+    print ("    More hdrs      ", await run_wrk(loop, 'http://localhost:8080/',options=more_headers,lua='tests/lua/pipeline.lua'), "Requests/second" )
+    print ("    Sessions       ", await run_wrk(loop, 'http://localhost:8080/s',lua='tests/lua/q-session.lua'), "Requests/second" )
+    print ("    File Upload    ", await run_wrk(loop, 'http://localhost:8080/upload',lua='tests/lua/q-upload.lua'), "Requests/second" )
+    print ("    mrpacker       ", await run_wrk(loop, 'http://localhost:8080/mrpacker',lua='tests/lua/q-mrp.lua'), "Requests/second" )
+    print ("    Form           ", await run_wrk(loop, 'http://localhost:8080/form',lua='tests/lua/q-form.lua'), "Requests/second" )
+    if 1:
+
+      print("")
+      print("  One by one")
+      print ("    Hello          ", await run_wrk(loop, 'http://localhost:8080/'),             "Requests/second" )
+      print ("    Hello hdrs     ", await run_wrk(loop, 'http://localhost:8080/', options=more_headers), "Requests/second" )
+      print ("    Cookies        ", await run_wrk(loop, 'http://localhost:8080/printCookies', options=opts), "Requests/second" )
+      print ("    many args      ", await run_wrk(loop, 'http://localhost:8080/sixargs/one/two/three/four/five/six'), "Requests/second" )
+      print ("    404 natural    ", await run_wrk(loop, 'http://localhost:8080/dfads404/'), "Requests/second" )
+      print ("    404            ", await run_wrk(loop, 'http://localhost:8080/404/'), "Requests/second" )
+      print ("    Form parsing   ", await run_wrk(loop, 'http://localhost:8080/form',lua='tests/lua/form.lua'), "Requests/second" )
+      #print ("   Templates      ", await run_wrk(loop, 'http://localhost:8080/template'),            "Requests/second" )
+      print ("    mrpacker       ", await run_wrk(loop,'http://localhost:8080/mrpacker',lua='tests/lua/mrpacker.lua'), "Requests/second" )
+      print ("    Sessions       ", await run_wrk(loop, 'http://localhost:8080/s',     options=opts), "Requests/second" )
+      print ("    File Upload    ", await run_wrk(loop,'http://localhost:8080/upload',lua='tests/lua/upload.lua'), "Requests/second" )
+      # Disabled in s_bench.py print ("Sessions (py)  ", run_wrk(loop, 'http://localhost:8080/pys',   options=opts), "Requests/second" )
+      #print ("    Session login  ", await run_wrk(loop, 'http://localhost:8080/login'),               "Requests/second" )
+      #print ("    json post      ", await run_wrk(loop,'http://localhost:8080/json',lua='tests/lua/json.lua'), "Requests/second" )
+      #print ("    mrpacker py    ", await run_wrk(loop,'http://localhost:8080/mrpackerpy',lua='tests/lua/mrpacker.lua'), "Requests/second" )
+      #print ("    msgpack py     ", await run_wrk(loop,'http://localhost:8080/msgpack',lua='tests/lua/msgpack.lua'), "Requests/second" )
+    
+      
+      opts = ('-H','XX-Real-IP: 1.2.3.4')
+      print ("    get ip         ", await run_wrk(loop,'http://localhost:8080/getip',options=opts), "Requests/second" )
+      #print ("many num args  ", await run_wrk(loop, 'http://localhost:8080/sixargs/155/2001/29999/25/29999543/93243242394'), "Requests/second" )
+      #print ("404            ", await run_wrk(loop, 'http://localhost:8080/404/'), "Requests/second" )
+  
 
-asyncio.set_event_loop(loop)
+  except KeyboardInterrupt:
+    pass
+  finally:
+    proc.terminate()
+    await proc.wait()
 
-server_fut = asyncio.create_subprocess_exec( 'python', 'tests/s_bench.py', stdout=asyncio.subprocess.PIPE )
-server = loop.run_until_complete(server_fut)
-process = psutil.Process(server.pid)
-
-time.sleep(1)
-try:
-
-  more_headers = ('-H','User-Agent: Mozilla/5.0 (X11; Linux x86_64) Gecko/20130501 Firefox/30.0 AppleWebKit/600.00 Chrome/30.0.0000.0 Trident/10.0 Safari/600.00',
-     '-H','Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-     '-H','Accept-Language: en-US,en;q=0.5',
-     '-H','Connection: keep-alive')
-  opts = ('-H','Cookie: mrsession=43709dd361cc443e976b05714581a7fb; foo=fdsfdasdfasdfdsfasdfsdfsdfasdfas; short=fazc;')
-  if 1:
-    print ("Hello pipelined", run_wrk(loop, 'http://localhost:8080/',lua='tests/lua/pipeline.lua'), "Requests/second" )
-    print ("More hdrs pipelined", run_wrk(loop, 'http://localhost:8080/',options=more_headers,lua='tests/lua/pipeline.lua'), "Requests/second" )
-    #print ("Hello          ", run_wrk(loop, 'http://localhost:8080/'),             "Requests/second" )
-    #print ("Hello hdrs     ", run_wrk(loop, 'http://localhost:8080/', options=more_headers), "Requests/second" )
-
-    #print ("Cookies        ", run_wrk(loop, 'http://localhost:8080/printCookies', options=opts), "Requests/second" )
-    #print ("many args      ", run_wrk(loop, 'http://localhost:8080/sixargs/one/two/three/four/five/six'), "Requests/second" )
-    #print ("404 natural    ", run_wrk(loop, 'http://localhost:8080/dfads404/'), "Requests/second" )
-    #print ("404            ", run_wrk(loop, 'http://localhost:8080/404/'), "Requests/second" )
-    #print ("Form parsing   ", run_wrk(loop, 'http://localhost:8080/form',lua='tests/lua/form.lua'), "Requests/second" )
-    #print ("Templates      ", run_wrk(loop, 'http://localhost:8080/template'),            "Requests/second" )
-    #print ("mrpacker       ", run_wrk(loop,'http://localhost:8080/mrpacker',lua='tests/lua/mrpacker.lua'), "Requests/second" )
-    #print ("Sessions       ", run_wrk(loop, 'http://localhost:8080/s',     options=opts), "Requests/second" )
-    # Disabled in s_bench.py print ("Sessions (py)  ", run_wrk(loop, 'http://localhost:8080/pys',   options=opts), "Requests/second" )
-    #print ("Session login  ", run_wrk(loop, 'http://localhost:8080/login'),               "Requests/second" )
-    #print ("json post      ", run_wrk(loop,'http://localhost:8080/json',lua='tests/lua/json.lua'), "Requests/second" )
-    #print ("mrpacker py    ", run_wrk(loop,'http://localhost:8080/mrpackerpy',lua='tests/lua/mrpacker.lua'), "Requests/second" )
-    #print ("msgpack py     ", run_wrk(loop,'http://localhost:8080/msgpack',lua='tests/lua/msgpack.lua'), "Requests/second" )
+async def main():
+  print("main")  
+  await run_tests()
+  await run_benchmarks()
 
-  
-    opts = ('-H','XX-Real-IP: 1.2.3.4')
-    #print ("get ip         ", run_wrk(loop,'http://localhost:8080/getip',options=opts), "Requests/second" )
-    print ("many num args  ", run_wrk(loop, 'http://localhost:8080/sixargs/155/2001/29999/25/29999543/93243242394'), "Requests/second" )
-    #print ("404            ", run_wrk(loop, 'http://localhost:8080/404/'), "Requests/second" )
-
-  # Grab the stdout for debug 
-  if 0:
-    lines = []
-    x = 0
-    while 1:
-      x += 1
-      print(x)
-      #if x > 19842: break
-      if x > 21605: break
-      line = loop.run_until_complete(server.stdout.readline())
-      if line:
-        line = line.decode('utf-8')
-        lines.append(line)
-      else:
-        break
-    print ( len(lines) )
-    o = open( "wrkout", "wb" )
-    o.write( (''.join(lines)).encode("utf-8") )
-    o.close()
-
-except KeyboardInterrupt:
-  pass
-finally:
-  server.terminate()
-  loop.run_until_complete(server.wait())
+loop = uvloop.new_event_loop()
+asyncio.set_event_loop(loop)
+asyncio.run( main() )
 
diff --git a/dotests.py.old b/dotests.py.old
new file mode 100644
index 0000000..81a5e3b
--- /dev/null
+++ b/dotests.py.old
@@ -0,0 +1,189 @@
+
+readme = """
+  pip install psutil requests msgpack mrasyncmc tenjin mrpacker
+  mrcache and mrworkserver must be running for the benchmarks
+"""
+
+
+import pkgutil, time
+import inspect
+import types 
+import importlib
+
+import tests
+
+# TODO
+#  Check for mrworkserver and mrcache being up and add the session key so we hit and load the json 43709dd361cc443e976b05714581a7fb
+#     mrcache -m 64 -i 16
+#     python mrworkserver/tst.py
+
+
+
+if 1:
+  package = tests
+  for importer, modname, ispkg in pkgutil.iter_modules(package.__path__):
+    if modname.startswith("test"):
+      m = importlib.import_module('tests.'+modname)
+      functions = inspect.getmembers(m, inspect.isfunction)
+      for f in functions:
+        if f[0] == 'setup':
+          if f[1]():
+            exit()
+      for f in functions:
+        if f[0].startswith("test_"):
+          try:
+            f[1]()
+          except Exception as e:
+            print(e)
+      for f in functions:
+        if f[0] == 'teardown':
+          f[1]()
+  
+print("Benchmarks")
+    
+import argparse
+import sys
+import asyncio
+import os
+from asyncio.subprocess import PIPE, STDOUT
+import statistics
+
+import uvloop
+import psutil
+import atexit
+
+#from misc import cpu
+
+
+def run_wrk(loop, endpoint=None, lua=None, options=None):
+  rps = 0
+  try: 
+    endpoint = endpoint or 'http://localhost:8080/'
+    if lua:
+      if options != None:
+        wrk_fut = asyncio.create_subprocess_exec( 'wrk', '-t', '4', '-c', '32', '-d', '2', '-s', lua, *options, endpoint, stdout=PIPE, stderr=STDOUT)
+      else:
+        wrk_fut = asyncio.create_subprocess_exec( 'wrk', '-t', '4', '-c', '32', '-d', '2', '-s', lua, endpoint, stdout=PIPE, stderr=STDOUT)
+    else:
+      if options != None:
+        wrk_fut = asyncio.create_subprocess_exec( 'wrk', '-t', '4', '-c', '32', '-d', '2', *options, endpoint, stdout=PIPE, stderr=STDOUT)
+      else:
+        wrk_fut = asyncio.create_subprocess_exec( 'wrk', '-t', '4', '-c', '32', '-d', '2', endpoint, stdout=PIPE, stderr=STDOUT)
+  
+    wrk = loop.run_until_complete(wrk_fut)
+    rps = 0
+    lines = []
+    while 1:
+      line = loop.run_until_complete(wrk.stdout.readline())
+      if line:
+        line = line.decode('utf-8')
+        lines.append(line)
+        if line.startswith('Requests/sec:'):
+          rps = float(line.split()[-1])
+      else:
+        break
+  
+    retcode = loop.run_until_complete(wrk.wait())
+    if retcode != 0:
+      print('\r\n'.join(lines))
+  except Exception as e:
+    print(e)
+
+
+  return rps
+
+
+noisy = ['atom', 'chrome', 'firefox', 'dropbox', 'opera', 'spotify', 'gnome-documents']
+
+def silence():
+  for proc in psutil.process_iter():
+    if proc.name() in noisy:
+      proc.suspend()
+
+  def resume():
+    for proc in psutil.process_iter():
+      if proc.name() in noisy:
+        proc.resume()
+  atexit.register(resume)
+
+silence()
+
+loop = uvloop.new_event_loop()
+
+asyncio.set_event_loop(loop)
+
+server_fut = asyncio.create_subprocess_exec( 'python', 'tests/s_bench.py', stdout=asyncio.subprocess.PIPE )
+proc = loop.run_until_complete(server_fut)
+process = psutil.Process(proc.pid)
+
+# Can't get this to work when the server fails to start
+#retcode = loop.run_until_complete(proc.wait())
+#print(retcode)
+#exit()
+#print(dir(proc))
+#time.sleep(5)
+#print( proc.pid in psutil.pids())
+#print(proc.returncode)
+#if proc.returncode != None:
+  #print( "YAY")
+  #exit()
+try:
+
+  more_headers = ('-H','User-Agent: Mozilla/5.0 (X11; Linux x86_64) Gecko/20130501 Firefox/30.0 AppleWebKit/600.00 Chrome/30.0.0000.0 Trident/10.0 Safari/600.00',
+     '-H','Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+     '-H','Accept-Language: en-US,en;q=0.5',
+     '-H','Connection: keep-alive')
+  opts = ('-H','Cookie: mrsession=43709dd361cc443e976b05714581a7fb; foo=fdsfdasdfasdfdsfasdfsdfsdfasdfas; short=fazc;')
+  if 1:
+    print ("Hello pipelined", run_wrk(loop, 'http://localhost:8080/',lua='tests/lua/pipeline.lua'), "Requests/second" )
+    print ("More hdrs pipelined", run_wrk(loop, 'http://localhost:8080/',options=more_headers,lua='tests/lua/pipeline.lua'), "Requests/second" )
+    #print ("Hello          ", run_wrk(loop, 'http://localhost:8080/'),             "Requests/second" )
+    #print ("Hello hdrs     ", run_wrk(loop, 'http://localhost:8080/', options=more_headers), "Requests/second" )
+
+    #print ("Cookies        ", run_wrk(loop, 'http://localhost:8080/printCookies', options=opts), "Requests/second" )
+    #print ("many args      ", run_wrk(loop, 'http://localhost:8080/sixargs/one/two/three/four/five/six'), "Requests/second" )
+    #print ("404 natural    ", run_wrk(loop, 'http://localhost:8080/dfads404/'), "Requests/second" )
+    #print ("404            ", run_wrk(loop, 'http://localhost:8080/404/'), "Requests/second" )
+    #print ("Form parsing   ", run_wrk(loop, 'http://localhost:8080/form',lua='tests/lua/form.lua'), "Requests/second" )
+    #print ("Templates      ", run_wrk(loop, 'http://localhost:8080/template'),            "Requests/second" )
+    #print ("mrpacker       ", run_wrk(loop,'http://localhost:8080/mrpacker',lua='tests/lua/mrpacker.lua'), "Requests/second" )
+    #print ("Sessions       ", run_wrk(loop, 'http://localhost:8080/s',     options=opts), "Requests/second" )
+    # Disabled in s_bench.py print ("Sessions (py)  ", run_wrk(loop, 'http://localhost:8080/pys',   options=opts), "Requests/second" )
+    #print ("Session login  ", run_wrk(loop, 'http://localhost:8080/login'),               "Requests/second" )
+    #print ("json post      ", run_wrk(loop,'http://localhost:8080/json',lua='tests/lua/json.lua'), "Requests/second" )
+    #print ("mrpacker py    ", run_wrk(loop,'http://localhost:8080/mrpackerpy',lua='tests/lua/mrpacker.lua'), "Requests/second" )
+    #print ("msgpack py     ", run_wrk(loop,'http://localhost:8080/msgpack',lua='tests/lua/msgpack.lua'), "Requests/second" )
+
+  
+    opts = ('-H','XX-Real-IP: 1.2.3.4')
+    #print ("get ip         ", run_wrk(loop,'http://localhost:8080/getip',options=opts), "Requests/second" )
+    print ("many num args  ", run_wrk(loop, 'http://localhost:8080/sixargs/155/2001/29999/25/29999543/93243242394'), "Requests/second" )
+    #print ("404            ", run_wrk(loop, 'http://localhost:8080/404/'), "Requests/second" )
+
+  # Grab the stdout for debug 
+  if 0:
+    lines = []
+    x = 0
+    while 1:
+      x += 1
+      print(x)
+      #if x > 19842: break
+      if x > 21605: break
+      line = loop.run_until_complete(proc.stdout.readline())
+      if line:
+        line = line.decode('utf-8')
+        lines.append(line)
+      else:
+        break
+    print ( len(lines) )
+    print(lines)
+    #o = open( "wrkout", "wb" )
+    #o.write( (''.join(lines)).encode("utf-8") )
+    #o.close()
+
+except KeyboardInterrupt:
+  pass
+finally:
+  proc.terminate()
+  loop.run_until_complete(proc.wait())
+
diff --git a/examples/10_multiplefiles.py b/examples/10_multiplefiles.py
index aa34a68..6ee9ac0 100644
--- a/examples/10_multiplefiles.py
+++ b/examples/10_multiplefiles.py
@@ -9,6 +9,6 @@ def hello(request):
 app.run(cores=2)
 
 # curl -i --raw 'http://localhost:8080/'
-# /tst was added in multiplefiles.py
+#    /tst was added in multiplefiles.py
 # curl -i --raw 'http://localhost:8080/tst'
 
diff --git a/gbench/bld b/gbench/bld
index 8b94551..72ba4a5 100755
--- a/gbench/bld
+++ b/gbench/bld
@@ -1,6 +1,7 @@
-
-g++ t.cpp -g -O0 -msse4.2 -mavx2 -std=c++11 -lbenchmark -lpthread -o t
+#
+#g++ t.cpp -O3 -msse4.2 -mavx2 -std=c++11 -lbenchmark -lpthread -o t
 #g++ tst.cpp -O3 -msse4.2 -mavx2 -std=c++11 -lbenchmark -lpthread -o tst
 g++ parse.cpp -O3 -msse4.2 -mavx2 -std=c++11 -lbenchmark -lpthread -o parse
-#g++ string.cpp -O3 -msse4.2 -mavx2 -std=c++11 -lbenchmark -lpthread -o string
+#g++ string.cpp -O0 -msse4.2 -mavx2 -std=c++11 -lbenchmark -lpthread -o string
+#g++ query.cpp -O0 -msse4.2 -mavx2 -std=c++11 -lbenchmark -lpthread -o query
 
diff --git a/gbench/parse.cpp b/gbench/parse.cpp
index 46f776f..acbea6a 100644
--- a/gbench/parse.cpp
+++ b/gbench/parse.cpp
@@ -18,8 +18,19 @@
 #define unlikely(x) (x)
 #endif
 
+#ifdef _MSC_VER
+#define ALIGNED(n) _declspec(align(n))
+#else
+#define ALIGNED(n) __attribute__((aligned(n)))
+#endif
+
+
 #define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
 
+#define CHAR4_TO_INT(a, b, c, d)         \
+   (unsigned int)((d << 24) | (c << 16) | (b << 8) | a)
+
+
 #define CHECK_END()                                                                                                                \
     if (buf == buf_end) {                                                                                                          \
         *ret = -2;                                                                                                                 \
@@ -204,6 +215,49 @@ FOUND_CTL:
 
     return buf;
 }
+static inline int getSession( const char *buf, size_t buflen ) {
+  const char *end = buf + buflen;
+  const char *last = buf;
+  const char *ses;
+  int len;
+
+  static char ALIGNED(16) ranges1[] = "==" ";;";
+  int found;
+  int state = 0;
+  do {
+    last = buf;
+    buf = findchar(buf, end, ranges1, sizeof(ranges1) - 1, &found);
+    if ( found ) {
+      if ( *buf == '=' ) {
+        //printf( " fnd >%.*s<\n", buf-last, last );  
+        if ( state == 0 ) {
+          // Save out the mrsession id 
+          if ( buf-last == 9 && ( *((unsigned int *)(last)) == CHAR4_TO_INT('m', 'r', 's','e') ) ) {
+            state = 1;
+          }
+          buf+=1;
+        }
+      }
+      else if ( *buf == ';' ) {
+        //printf( " fnd >%.*s<\n", buf-last, last );  
+        if (state == 1 ) {
+          ses = last;
+          len = buf-last;
+          return len;
+        }
+        state = 0;
+        buf+=1;
+        while ( *buf == 32 ) buf++;
+      }
+    }
+  } while( found );
+  if (state) {
+    ses = last;
+    len = buf-last;
+    return len;
+  }
+  return -1;
+}
 static const char *my_get_eol128(const char *buf) {
   //__m128i* pSrc1 = (__m128i *)string;         // init pointer to start of string
   __m128i m0 = _mm_set1_epi8(13);              // vector of 16 `\0` characters
@@ -223,10 +277,9 @@ static const char *my_get_eol128(const char *buf) {
 }
 
  //64bits  256bits  bytes 8 * 32 
-__m256i m13 = _mm256_set1_epi8(13);             
-__m256i m32 = _mm256_set1_epi8(32);             
-static const char *my_get_eol(const char *buf) {
+static const char *my_get_eol(const char *buf, const char *buf_end) {
 
+  __m256i m13 = _mm256_set1_epi8(13);             
   while (1)
   {
     __m256i v0 = _mm256_loadu_si256((const __m256i *)buf);
@@ -234,12 +287,15 @@ static const char *my_get_eol(const char *buf) {
     unsigned long vmask = _mm256_movemask_epi8(v1);  
     if (vmask != 0) {
         buf += TZCNT(vmask) + 2;
+        if ( buf > buf_end ) return NULL;
         break;                             
     }
     buf += 32; //pSrc1++;                 
+    if ( buf > buf_end ) return NULL;
   }
   return buf;
 }
+  __m256i m32 = _mm256_set1_epi8(32);             
 static const char *get_to_space(const char *buf, int *len) {
   const char *orig = buf;
   while (1)
@@ -257,6 +313,65 @@ static const char *get_to_space(const char *buf, int *len) {
   return buf;
 }
 
+__m256i m59 = _mm256_set1_epi8(59);
+__m256i m61 = _mm256_set1_epi8(61);
+static int getSession_avx2( const char* buf, const char* buf_end ) {
+  unsigned int msk;
+  int i=0,tz; // 32B index
+  int cnt = 0;
+  unsigned int shifted;
+  const char *sbuf = buf;
+  const char *obuf = buf;
+  int name_or_value = 0;
+  int found = 0;
+
+  do {
+    const char *block_start = obuf+32*i;
+    __m256i b0 = _mm256_loadu_si256((const __m256i *) block_start);
+    msk = _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b0, m59), _mm256_cmpeq_epi8(b0, m61) ) );
+    while (1) {
+
+      //if ( buf >= buf_end ) { goto sesdone; }
+      shifted = buf-block_start;
+      if ( shifted >= 32 ) break;
+      tz = TZCNT((msk >> shifted));
+      if ( tz < 32 ) {
+        buf += tz;
+        //printf( " fnd >%.*s<\n", buf-sbuf, sbuf );  
+        if ( buf >= buf_end ) { goto sesdone; }
+        if ( name_or_value == 1 ) {
+          if ( *buf == '=' ) { buf += 1; continue; } // = in value field
+          if ( found ) {
+            //printf( " done >%.*s<\n", buf-sbuf, sbuf );  
+            return buf-sbuf;
+          }
+          buf+=1;
+          name_or_value = 0;
+        } else {
+          if ( buf-sbuf == 9 && ( *((unsigned int *)(sbuf)) == CHAR4_TO_INT('m', 'r', 's','e') ) ) {
+            found = 1;
+          }
+          name_or_value = 1;
+        }
+        buf += 1;
+        sbuf = buf;
+      } else {
+        buf += 32 - shifted;
+        break;
+      }
+
+    }
+    i+=1;
+    if ( buf >= buf_end ) { goto sesdone; }
+  } while ( buf-obuf < buf_end-obuf );
+
+sesdone:
+  if ( found ) {
+    //printf( " sesdone >%.*s<\n", buf-sbuf, sbuf );  
+    return buf-sbuf;
+  }
+  return 0;
+}
 
 
 static const char *parse_headers(const char *buf, const char *buf_end, int *ret)
@@ -317,7 +432,7 @@ static const char *parse_headers(const char *buf, const char *buf_end, int *ret)
                 //mrr->flags = 2;
               //} 
               //buf = get_token_to_eol(buf, buf_end, ret); 
-              buf = my_get_eol(buf);
+              buf = my_get_eol(buf, buf_end);
               goto skipvalue;
             }
             if ( buf[13] == ':' ) { // Cache-Control:
@@ -338,7 +453,7 @@ static const char *parse_headers(const char *buf, const char *buf_end, int *ret)
               buf += 18;
               //mrr->ip = buf;
               //buf = get_token_to_eol(buf, buf_end, ret); 
-              buf = my_get_eol(buf);
+              buf = my_get_eol(buf, buf_end);
               //mrr->ip_len = headers[*num_headers].value_len;
               goto skipvalue;
             }
@@ -366,7 +481,7 @@ static const char *parse_headers(const char *buf, const char *buf_end, int *ret)
               buf += 11;
               //mrr->ip = buf;
               //buf = get_token_to_eol(buf, buf_end, ret); 
-              buf = my_get_eol(buf);
+              buf = my_get_eol(buf, buf_end);
               //mrr->ip_len = headers[*num_headers].value_len;
               goto skipvalue;
             }
@@ -534,7 +649,7 @@ static const char *parse_headers(const char *buf, const char *buf_end, int *ret)
         }
 hvalue:
         //if ((buf = get_token_to_eol(buf, buf_end, ret)) == NULL) {
-        if ((buf = my_get_eol(buf)) == NULL) {
+        if ((buf = my_get_eol(buf, buf_end)) == NULL) {
             return NULL;
         }
 skipvalue:
@@ -755,7 +870,6 @@ wedone:
 
 
 
-//__m256i m13 = _mm256_set1_epi8(13);
 __m256i m58 = _mm256_set1_epi8(58);   //  0x1313131313131313...
                                       //  0x32333435363713   //  abcdef\r
                                       //  32 bit number 0x40
@@ -765,6 +879,7 @@ static void parse_mine( const char* buf ) {
 
   //__m256i b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,b10,b11,b12,b13,b14,b15;
   __m256i b0,b1,b2,b3,b4,b5,b6,b7;
+  __m256i m13 = _mm256_set1_epi8(13);
 
   b0 = _mm256_loadu_si256((const __m256i *) (buf + 32*0)); // buf[0]
   b1 = _mm256_loadu_si256((const __m256i *) (buf + 32*1)); // buf[32]
@@ -860,6 +975,7 @@ static void parse_mine3( const char* buf ) {
 
   //__m256i b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,b10,b11,b12,b13,b14,b15;
   __m256i b0,b1,b2,b3,b4,b5,b6,b7;
+  __m256i m13 = _mm256_set1_epi8(13);
 
   const char *obuf = buf;
   const char *sbuf = buf;
@@ -951,26 +1067,33 @@ done:
   i += 1;
 }
 
-static void parse_mine2( const char* buf ) {
-  unsigned int msk;
-  int i=0,tz; // 32B index
-  int cnt = 0;
+static void parse_mine2( const char* buf, const char *buf_end ) {
+  unsigned long msk;
+  int i=0, tz; // 32B index
   unsigned int shifted;
   const char *sbuf = buf;
   const char *obuf = buf;
   int name_or_value = 0;
+  __m256i m13 = _mm256_set1_epi8(13);
 
   do {
-    const char *block_start = obuf+32*i;
+    const char *block_start = obuf+64*i;
     __m256i b0 = _mm256_loadu_si256((const __m256i *) block_start);
-    msk = _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b0, m13), _mm256_cmpeq_epi8(b0, m58) ) );
+    __m256i b1 = _mm256_loadu_si256((const __m256i *) (block_start+32));
+    msk = (unsigned int) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b0, m13), _mm256_cmpeq_epi8(b0, m58) ) )  |
+        ((unsigned long) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b1, m13), _mm256_cmpeq_epi8(b1, m58) ) ) << 32);
+
+    //const char *block_start = obuf+32*i;
+    //__m256i b0 = _mm256_loadu_si256((const __m256i *) block_start);
+    //msk = _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b0, m13), _mm256_cmpeq_epi8(b0, m58) ) );
     while (1) {
 
       shifted = buf-block_start;
-      if ( shifted >= 32 ) break;
+      if ( shifted >= 64 ) break;
       tz = TZCNT((msk >> shifted));
-      if ( tz < 32 ) {
+      if ( tz < 64 ) {
         buf += tz;
+
         if ( name_or_value == 1 ) {
           if ( *buf == ':' ) { buf += 1; continue; } // : in value field
           name_or_value = 0;
@@ -978,16 +1101,16 @@ static void parse_mine2( const char* buf ) {
           name_or_value = 1;
         }
         //printf( " fnd >%.*s<\n", buf-sbuf, sbuf );  
-        buf += 2; if ( *buf == '\r' ) break; // \r\n\r\n marks the end
+        buf += 2; if ( *buf == '\r' ) return; // \r\n\r\n marks the end
         sbuf = buf;
       } else {
-        buf += 32 - shifted;
+        buf += 64 - shifted;
         break;
       }
 
     }
-    i+=1;
-  } while ( *buf != '\r' );
+  } while ( buf < buf_end);
+  //} while ( *buf != '\r' );
 }
 
 static void parse_sse4( const char* buf ) {
@@ -999,25 +1122,24 @@ static void parse_sse4( const char* buf ) {
 static void parse_mysse4( const char* buf ) {
   int ret = 0;
   while ( ret == 0 && buf != NULL && buf[0] != '\r' ) {
-    buf = my_get_eol( buf );
+    buf = my_get_eol( buf, buf+512 );
   }
 }
 static char buf[8096] = "Host: server\r\n"
 "User-Agent: Mozilla/5.0 (X11; Linux x86_64) Gecko/20130501 Firefox/30.0 AppleWebKit/600.00 Chrome/30.0.0000.0 Trident/10.0 Safari/600.00\r\n"
 "Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
-"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
-"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
-"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
-"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
-"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
-"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
-"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
 "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,* /*;q=0.8\r\n"
 "Accept-Language: en-US,en;q=0.5\r\n"
 "Connection: keep-alive\r\n\r\n";
 static char buf2[8096] = "Host: localhost:8080\r\nUser-Agent: python-requests/2.31.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: * /*\r\nConnection: keep-alive\r\nCookie: foo=b=ar\r\nContent-Length: 0\r\n\r\n";
 static char path[8096] = "/foo/bar/bazfdasfffffffffffffffffffffffffffffffffffffffdfffffffffffffffffffffffffffffffffffffffffffffffffff ";
 
+//static  char cbuf[8096] = "uid=123456781234567890; mrsession=1234567890.1234567890.12; wd=2560x1600";
+//static  int  clen= strlen("uid=123456781234567890; mrsession=1234567890.1234567890.12; wd=2560x1600");
+static  char cbuf[8096] = "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa; mrsession=1234567890.1234567890.12; xxxxxxxxxxxxxxxxxxxxxxxxx=zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz";
+static  int  clen= strlen("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa; mrsession=1234567890.1234567890.12; xxxxxxxxxxxxxxxxxxxxxxxxx=zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz");
+static  const char *cend = cbuf + clen;
+
 static void BM_SlowParse(benchmark::State& state) {
   // Perform setup here
   std::string text = "Host: server\n"
@@ -1054,7 +1176,7 @@ static void BM_my_header_parse(benchmark::State& state) {
 }
 static void BM_my2_header_parse(benchmark::State& state) {
   for (auto _ : state) {
-    parse_mine2(buf);
+    parse_mine2(buf,buf+2048);
   }
 }
 static void BM_my3_header_parse(benchmark::State& state) {
@@ -1092,20 +1214,37 @@ static void BM_adv_token_avx2(benchmark::State& state) {
     get_to_space(path, &path_len);
   }
 }
+static void BM_get_session(benchmark::State& state) {
+  int ret = 0;
+  for (auto _ : state) {
+    getSession(cbuf, clen);
+  }
+}
+static void BM_get_session_avx2(benchmark::State& state) {
+  int ret = 0;
+  for (auto _ : state) {
+    getSession_avx2(cbuf, cend);
+  }
+}
+
+
+
 
 
 
 
 //BENCHMARK(BM_SlowParse);
-//BENCHMARK(BM_sse4_get_eol);
-//BENCHMARK(BM_my_get_eol);
-BENCHMARK(BM_my3_header_parse);
+BENCHMARK(BM_sse4_get_eol);
+BENCHMARK(BM_my_get_eol);
+//BENCHMARK(BM_my3_header_parse);
 //BENCHMARK(BM_my2_header_parse);
 //BENCHMARK(BM_my_header_parse);
-BENCHMARK(BM_old_header_parse);
+//BENCHMARK(BM_old_header_parse);
 //BENCHMARK(BM_avx2_header_parse);
-BENCHMARK(BM_adv_token);
-BENCHMARK(BM_adv_token_avx2);
+//BENCHMARK(BM_adv_token);
+//BENCHMARK(BM_adv_token_avx2);
+//BENCHMARK(BM_get_session);
+//BENCHMARK(BM_get_session_avx2);
 BENCHMARK_MAIN();
 
 /*
@@ -1125,12 +1264,17 @@ int main() {
   //strcpy(buf,"Host: localhost:8080\r\nUser-Agent: curl/7.68.0\r\nAccept: * /*\r\n\r\n");
   //strcpy(buf,"Host: localhost:8080\r\nUser-Agent: python-requests/2.31.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: * /*\r\nConnection: keep-alive\r\nCookie: foo=b=ar\r\nContent-Length: 0\r\n\r\n");
 
+  char cbuf[8096] = "uid=123456781234567890; mrsession=1234567890.1234567890.12; wd=2560x1600";
+  int len = strlen("uid=123456781234567890; mrsession=1234567890.1234567890.12; wd=2560x1600");
+  const char *cend = cbuf + len;
+
+  getSession_avx2(cbuf,cend);
 
   int ret = 0;
-  parse_headers_avx2(buf,buf+512,&ret);
+  //parse_headers_avx2(buf,buf+512,&ret);
   //parse_headers(buf,buf+2048,&ret);
   //parse_mine3(buf);
-  printf(" ret=%d\n",ret);
+  //printf(" ret=%d\n",ret);
 
   //unsigned long long l = 0x80008020ull;
   //unsigned int s = 7;
@@ -1138,5 +1282,4 @@ int main() {
 
 }
 
-
 */
diff --git a/gbench/query.cpp b/gbench/query.cpp
new file mode 100644
index 0000000..8ece864
--- /dev/null
+++ b/gbench/query.cpp
@@ -0,0 +1,757 @@
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string>
+#include <cstring>
+#include <x86intrin.h>
+#ifdef __AVX2__
+#include <immintrin.h>
+#endif
+
+#include <benchmark/benchmark.h>
+
+#if __GNUC__ >= 3
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#else
+#define likely(x) (x)
+#define unlikely(x) (x)
+#endif
+
+#ifdef _MSC_VER
+#define ALIGNED(n) _declspec(align(n))
+#else
+#define ALIGNED(n) __attribute__((aligned(n)))
+#endif
+
+
+#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
+
+#define hex_to_dec(x) \
+  ((x <= '9' ? 0 : 9) + (x & 0x0f))
+#define is_hex(x) ((x >= '0' && x <= '9') || (x >= 'A' && x <= 'F'))
+
+
+#define CHAR4_TO_INT(a, b, c, d)         \
+   (unsigned int)((d << 24) | (c << 16) | (b << 8) | a)
+
+
+#define CHECK_END()                                                                                                                \
+    if (buf == buf_end) {                                                                                                          \
+        *ret = -2;                                                                                                                 \
+        return NULL;                                                                                                               \
+    }
+
+
+#define CHECK_EOF()                                                                                                                \
+    if (buf == buf_end) {                                                                                                          \
+        *ret = -2;                                                                                                                 \
+        return NULL;                                                                                                               \
+    }
+
+#define EXPECT_CHAR_NO_CHECK(ch)                                                                                                   \
+    if (*buf++ != ch) {                                                                                                            \
+        *ret = -1;                                                                                                                 \
+        return NULL;                                                                                                               \
+    }
+
+#define EXPECT_CHAR(ch)                                                                                                            \
+    CHECK_EOF();                                                                                                                   \
+    EXPECT_CHAR_NO_CHECK(ch);
+
+
+// Table for converting to lower case
+#define TOLC(c) __lct[(unsigned char)c]
+static const unsigned char __lct[] __attribute__((aligned(64))) = {
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+  0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+  0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+  0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+  0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+  0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+  0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+  0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+  0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+  0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+  0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+  0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+  0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+  0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+  0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+  0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+  0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+  0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+  0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+  0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+  0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+  0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+  0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+  0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+  0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+  0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+  0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+  0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
+};
+
+
+static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
+                                    "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
+                                    "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+
+static unsigned long TZCNT(unsigned long long in) {
+  unsigned long res;
+  asm("tzcnt %1, %0\n\t" : "=r"(res) : "r"(in));
+  return res;
+}
+
+
+// Search for a range of characters and return a pointer to the location or buf_end if none are found
+char *findchar(char *buf, char *buf_end, char *ranges, size_t ranges_size, int *found)
+{
+    *found = 0;
+    __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
+    if (likely(buf_end - buf >= 16)) {
+
+        size_t left = (buf_end - buf) & ~15;
+        do {
+            __m128i b16 = _mm_loadu_si128((const __m128i *)buf);
+            int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
+            if (unlikely(r != 16)) {
+                buf += r;
+                *found = 1;
+                return buf;
+            }
+            buf += 16;
+            left -= 16;
+        } while (likely(left != 0));
+
+    }
+
+    size_t left = buf_end - buf;
+    if ( left != 0 ) {
+      static char sbuf[16] = {0};
+      memcpy( sbuf, buf, left );
+      __m128i b16 = _mm_loadu_si128((const __m128i *)sbuf);
+      size_t r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
+      if (unlikely(r != 16) && r < left) {
+        buf += r;
+        *found = 1;
+        return buf;
+      } else {
+        buf = buf_end;
+      }
+    }
+
+    *found = 0;
+    return buf;
+}
+//          1         2         3         4         5         6         7
+// 1234567890123456789012345678901234567890123456789012345678901234567890
+// /spanish/objetos%20voladores%20no%20identificados?foo=bar
+// /spanish/objetos%20voladores/
+static inline size_t sse_decode(char* path, ssize_t length, int *qs_len) {
+  //DBG printf("sse_decode >%.*s<\n", (int)length, path);
+  if (length == 0) return length;
+  char *pat = path;
+  static char ranges1[] = "%%" "??";
+  char *end = path + length;
+  int found;
+
+  // We only findchar once - benchmark one or more % encodings with continuing to use findchar ( Spanish / Chinese )
+  do {
+    //printf("sse_decode >%.*s<\n", (int)length, path);
+    pat = findchar(pat, end, ranges1, sizeof(ranges1) - 1, &found);
+    if ( found ) {
+      if(*pat == '%' && is_hex(*(pat + 1)) && is_hex(*(pat + 2))) {
+        *pat = (hex_to_dec(*(pat + 1)) << 4) + hex_to_dec(*(pat + 2));
+        pat+=3;
+        length -= 2;
+      } else {
+        *qs_len = end-pat;
+        length -= end-pat;
+        break;
+      }
+    }
+  } while (0);
+
+  if( !found || *pat == '?') return length;
+  char *write = pat;
+  if ( found ) write -= 2;
+  char *read = pat;
+  for (;read < end;) {
+    if (read[0] == '?') {
+      length -= end-read;
+      *qs_len  = end-pat;
+      break;
+    }
+    if ( read[0] == '%' ) {
+      if( is_hex(read[1]) && is_hex(read[2]) ) {
+        *write = (hex_to_dec(read[1]) << 4) + hex_to_dec(read[2]);
+        write+=1;
+        read += 3;
+        length-=2;
+      } else {
+        if (read > write) {
+          write[0] = read[0];
+          write[1] = read[1];
+        }
+        read += 2;
+        write += 2;
+      }
+
+    } else {
+      if (read > write) {
+        write[0] = read[0];
+      }
+      read++;
+      write++;
+    }
+  }
+  //printf("sse_decode len %d path >%.*s<\n", (int)length, (int)length, path);
+  //printf(" qs %d\n",*qs_len);
+
+  return length;
+}
+
+__m256i m37 = _mm256_set1_epi8(37); // %
+__m256i m63 = _mm256_set1_epi8(63); // ?
+//          1         2         3         4         5         6         7
+// 1234567890123456789012345678901234567890123456789012345678901234567890
+// /print/%E4%B8%8D%E5%8F%AF%E5%86%8D%E7%94%9F%E8%B5%84%E6%BA%90/?test";
+// /spanish/objetos%20voladores%20no%20identificados?foo=bar
+// /spanish/objetos%20voladores/
+static inline int path_decode(char* buf, int len, int *qs_len) {
+  unsigned int msk;
+  int i=0,tz; // 32B index
+  unsigned int shifted;
+  char *sbuf = buf;
+  char *obuf = buf;
+  char *buf_end = buf+len;
+  char *wbuf;
+  int found = 0;
+
+  do {
+    const char *block_start = obuf+32*i;
+    __m256i b0 = _mm256_loadu_si256((const __m256i *) block_start);
+    msk = _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b0, m37), _mm256_cmpeq_epi8(b0, m63) ) );
+    while (1) {
+
+      //if ( buf >= buf_end ) { goto decdone; }
+      shifted = buf-block_start;
+      if ( shifted >= 32 ) break;
+      tz = TZCNT((msk >> shifted));
+      if ( tz < 32 ) {
+        buf += tz;
+        //printf( " fnd >%.*s<\n", (int)(buf-sbuf), sbuf );  
+        if ( buf >= buf_end ) { goto decdone; }
+        if ( *buf == '?' ) { 
+          len -= buf_end-buf;
+          *qs_len = buf_end-buf-1;
+          //printf("path_decode len %d path >%.*s<\n", (int)len, (int)len, obuf);
+          goto decdone;
+        }
+        if ( *buf == '%' ) { 
+          if ( found ) {
+            //printf( " copy >%.*s<\n", (int)(buf-sbuf), sbuf );  
+            memcpy( wbuf, sbuf, buf-sbuf );
+            //printf( " to   >%.*s<\n", (int)(buf-sbuf), wbuf );  
+            wbuf += buf-sbuf;
+            *wbuf = (hex_to_dec(buf[1]) << 4) + hex_to_dec(buf[2]);
+            wbuf++;
+          } else {
+            found = 1;
+            *buf = (hex_to_dec(buf[1]) << 4) + hex_to_dec(buf[2]);
+            wbuf = buf+1;
+          }
+          len -= 2;
+          buf += 3;
+        } 
+        sbuf = buf;
+      } else {
+        buf += 32 - shifted;
+        break;
+      }
+
+    }
+    i+=1;
+    if ( buf >= buf_end ) { goto decdone; }
+  } while ( buf < buf_end ); // Why doesn't this work
+
+decdone:
+  if ( found ) {
+    //printf( " copy >%.*s<\n", (int)(buf-sbuf), sbuf );  
+    memcpy( wbuf, sbuf, buf-sbuf );
+    //printf( " to   >%.*s<\n", (int)(buf-sbuf), wbuf );  
+  }
+  //printf( " fnd >%.*s<\n", (int)(buf-sbuf), sbuf );  
+  //printf("path_decode len %d path >%.*s<\n", (int)len, (int)len, obuf);
+  //printf(" qs %d\n",*qs_len);
+  return len;
+}
+static inline int path_decode2(char* buf, int len, int *qs_len) {
+  unsigned long long msk;
+  int i=0,tz; // 32B index
+  unsigned int shifted;
+  char *sbuf = buf;
+  char *obuf = buf;
+  char *buf_end = buf+len;
+  char *wbuf;
+  int found = 0;
+
+  do {
+    const char *block_start = obuf+64*i;
+    __m256i b0 = _mm256_loadu_si256((const __m256i *) block_start);
+    __m256i b1 = _mm256_loadu_si256((const __m256i *) (block_start+32));
+    msk = (unsigned int) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b0, m37), _mm256_cmpeq_epi8(b0, m63) ) )  |
+        ((unsigned long) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b1, m37), _mm256_cmpeq_epi8(b1, m63) ) ) << 32);
+    while (1) {
+
+      //if ( buf >= buf_end ) { goto decdone; }
+      shifted = buf-block_start;
+      if ( shifted >= 64 ) break;
+      tz = TZCNT((msk >> shifted));
+      if ( tz < 64 ) {
+        buf += tz;
+        //printf( " fnd >%.*s<\n", (int)(buf-sbuf), sbuf );  
+        if ( buf >= buf_end ) { goto decdone; }
+        if ( *buf == '?' ) { 
+          len -= buf_end-buf;
+          *qs_len = buf_end-buf-1;
+          //printf("path_decode len %d path >%.*s<\n", (int)len, (int)len, obuf);
+          goto decdone;
+        }
+        if ( *buf == '%' ) { 
+          if ( found ) {
+            //printf( " copy >%.*s<\n", (int)(buf-sbuf), sbuf );  
+            memcpy( wbuf, sbuf, buf-sbuf );
+            //printf( " to   >%.*s<\n", (int)(buf-sbuf), wbuf );  
+            wbuf += buf-sbuf;
+            *wbuf = (hex_to_dec(buf[1]) << 4) + hex_to_dec(buf[2]);
+            wbuf++;
+          } else {
+            found = 1;
+            *buf = (hex_to_dec(buf[1]) << 4) + hex_to_dec(buf[2]);
+            wbuf = buf+1;
+          }
+          len -= 2;
+          buf += 3;
+        } 
+        sbuf = buf;
+      } else {
+        buf += 64 - shifted;
+        break;
+      }
+
+    }
+    i+=1;
+    if ( buf >= buf_end ) { goto decdone; }
+  } while ( buf < buf_end ); // Why doesn't this work
+
+decdone:
+  if ( found ) {
+    //printf( " copy >%.*s<\n", (int)(buf-sbuf), sbuf );  
+    memcpy( wbuf, sbuf, buf-sbuf );
+    //printf( " to   >%.*s<\n", (int)(buf-sbuf), wbuf );  
+  }
+  //printf( " fnd >%.*s<\n", (int)(buf-sbuf), sbuf );  
+  //printf("path_decode len %d path >%.*s<\n", (int)len, (int)len, obuf);
+  //printf(" qs %d\n",*qs_len);
+  return len;
+}
+static inline int path_decode3(char* buf, int len, int *qs_len) {
+  if ( len > 32 ) return path_decode2(buf,len,qs_len);
+  else            return path_decode(buf,len,qs_len);
+}
+
+static inline void parse_query_args_old( char *buf, size_t buflen ) {
+  char *end = buf + buflen;
+  char *last = buf;
+  //PyObject* args = PyDict_New();
+
+  if ( buflen == 0 ) return;
+
+
+  //PyObject* key = NULL; PyObject* value = NULL;
+
+  static char ALIGNED(16) ranges1[] = "==" "&&";
+  int found;
+  int state = 0;
+  int grab_session = 0;
+  int ignore_me = 0;
+  size_t len;
+  // foo=bar&key=23%28
+  do {
+    buf = findchar(buf, end, ranges1, sizeof(ranges1) - 1, &found);
+    if ( found ) {
+      if ( *buf == '=' ) {
+        len = sse_decode( last, buf-last, &ignore_me );
+        //key = PyUnicode_FromStringAndSize(last, len); //TODO error
+        //printf( " key >%.*s<\n", (int)(buf-last), last);
+        state = 1;
+        buf+=1;
+        last = buf;
+      }
+      else if ( *buf == '&' ) {
+        //if ( state == 0 ) key  = PyUnicode_FromString("");
+
+       len = sse_decode( last, buf-last, &ignore_me );
+        //value = PyUnicode_FromStringAndSize(last, len);
+        //printf( " val >%.*s<\n", (int)(buf-last), last);
+        state = 0;
+        //PyDict_SetItem(args, key, value);  //  == -1) goto loop_error;
+        //Py_XDECREF(key);
+        //Py_XDECREF(value);
+        buf+=1;
+        while ( *buf == 32 ) buf++;
+        last = buf;
+      }
+      else {
+        printf(" ERR found not = or ; %.*s\n", 5, buf );
+      }
+    }
+  } while( found );
+
+  if ( buf == end ) {
+    //if ( state == 0 ) key  = PyUnicode_FromString("");
+    if ( buf == end && *(buf-1) == ' ' ) {
+      len = path_decode( last, buf-last-1, &ignore_me );
+      //value = PyUnicode_FromStringAndSize(last, len); //TODO error
+      //printf( " val >%.*s<\n", (int)(buf-last-1), last);
+    } else {
+      len = path_decode( last, buf-last, &ignore_me );
+      //value = PyUnicode_FromStringAndSize(last, len); //TODO error
+      //printf( " val >%.*s<\n", (int)(buf-last), last);
+    }
+    state = 0;
+    //PyDict_SetItem(args, key, value);  //  == -1) goto loop_error;
+    //Py_XDECREF(key);
+    //Py_XDECREF(value);
+  }
+
+  return;
+}
+static inline void parse_query_args( char *buf, size_t buflen ) {
+  char *end;
+  char *last = buf;
+  size_t len;
+  int ignore_me = 0;
+  //PyObject* args = PyDict_New();
+
+  if ( buflen == 0 ) return;
+
+  len = path_decode2( buf, buflen, &ignore_me );
+  //printf( " decoded >%.*s<\n", (int)(len), buf);
+  end = buf + len;
+
+  //PyObject* key = NULL; PyObject* value = NULL;
+
+  static char ALIGNED(16) ranges1[] = "==" "&&";
+  int found;
+  int state = 0;
+  int grab_session = 0;
+  // foo=bar&key=23%28
+  do {
+    buf = findchar(buf, end, ranges1, sizeof(ranges1) - 1, &found);
+    if ( found ) {
+      if ( *buf == '=' ) {
+        //len = path_decode( last, buf-last, &ignore_me );
+        //key = PyUnicode_FromStringAndSize(last, len); //TODO error
+        //printf( " key >%.*s<\n", (int)(buf-last), last);
+        state = 1;
+        buf+=1;
+        last = buf;
+      }
+      else if ( *buf == '&' ) {
+        //if ( state == 0 ) key  = PyUnicode_FromString("");
+
+       //len = path_decode( last, buf-last, &ignore_me );
+        //value = PyUnicode_FromStringAndSize(last, len);
+        //printf( " val >%.*s<\n", (int)(buf-last), last);
+        state = 0;
+        //PyDict_SetItem(args, key, value);  //  == -1) goto loop_error;
+        //Py_XDECREF(key);
+        //Py_XDECREF(value);
+        buf+=1;
+        while ( *buf == 32 ) buf++;
+        last = buf;
+      }
+      else {
+        printf(" ERR found not = or ; %.*s\n", 5, buf );
+      }
+    }
+  } while( found );
+
+  if ( buf == end ) {
+    //if ( state == 0 ) key  = PyUnicode_FromString("");
+    if ( buf == end && *(buf-1) == ' ' ) {
+      //len = path_decode( last, buf-last-1, &ignore_me );
+      //value = PyUnicode_FromStringAndSize(last, len); //TODO error
+      //printf( " val >%.*s<\n", (int)(buf-last-1), last);
+    } else {
+      //len = path_decode( last, buf-last, &ignore_me );
+      //value = PyUnicode_FromStringAndSize(last, len); //TODO error
+      //printf( " val >%.*s<\n", (int)(buf-last), last);
+    }
+    state = 0;
+    //PyDict_SetItem(args, key, value);  //  == -1) goto loop_error;
+    //Py_XDECREF(key);
+    //Py_XDECREF(value);
+  }
+
+  return;
+}
+__m256i m38 = _mm256_set1_epi8(38); // &
+__m256i m61 = _mm256_set1_epi8(61); // =
+static inline void parse_query_args2( char *buf, size_t len ) {
+  unsigned long long msk;
+  int i=0,tz; // 32B index
+  unsigned int shifted;
+  char *sbuf = buf;
+  char *obuf = buf;
+  int state = 0;
+  int ignore_me = 0;
+
+  if ( len == 0 ) return;
+  char *buf_end = buf+len;
+
+  do {
+    const char *block_start = obuf+64*i;
+    __m256i b0 = _mm256_loadu_si256((const __m256i *) block_start);
+    __m256i b1 = _mm256_loadu_si256((const __m256i *) (block_start+32));
+    msk = (unsigned int) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b0, m38), _mm256_cmpeq_epi8(b0, m61) ) )  |
+        ((unsigned long) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b1, m38), _mm256_cmpeq_epi8(b1, m61) ) ) << 32);
+    while (1) {
+
+      //if ( buf >= buf_end ) { goto decdone; }
+      shifted = buf-block_start;
+      if ( shifted >= 64 ) break;
+      tz = TZCNT((msk >> shifted));
+      if ( tz < 64 ) {
+        buf += tz;
+        if ( buf >= buf_end ) { goto pdone; }
+        if ( *buf == '=' ) { 
+          //printf( " key >%.*s<\n", (int)(buf-sbuf), sbuf );  
+          len = path_decode2( sbuf, buf-sbuf, &ignore_me );
+          state = 1;
+          buf += 1;
+        }
+        if ( *buf == '&' ) { 
+          //printf( " val >%.*s<\n", (int)(buf-sbuf), sbuf);
+          len = path_decode2( sbuf, buf-sbuf, &ignore_me );
+          state = 0;
+          buf+=1;
+        } 
+        sbuf = buf;
+      } else {
+        buf += 64 - shifted;
+        break;
+      }
+
+    }
+    i+=1;
+    if ( buf >= buf_end ) { goto pdone; }
+  } while ( buf < buf_end ); // Why doesn't this work
+
+pdone:
+  len = path_decode2( sbuf, buf-sbuf, &ignore_me );
+  //printf( " done >%.*s<\n", (int)(buf-sbuf), sbuf );  
+  return;
+}
+
+
+
+
+static char buf[8096] = "/foo/bar/bazfdas";
+static int  blen = strlen("/foo/bar/bazfdas");
+static char buf2[8096] = "/foo/bar/bazfdasfffffffffffffffffffffffffffffffffffffffdffffffffffffffffffffffffffffffffffffffffffffffffff1?foo=bar";
+static int  blen2 = strlen("/foo/bar/bazfdasfffffffffffffffffffffffffffffffffffffffdffffffffffffffffffffffffffffffffffffffffffffffffff1?foo=bar");
+//static char buf[8096]   = "/spanish/objetos%20voladoresentificados";
+//static int  blen = strlen("/spanish/objetos%20voladoresentificados");
+static char buf3[8096] = "/spanish/objetos%20voladores%20no%20identificados?foo=bar";
+static int  blen3 = strlen("/spanish/objetos%20voladores%20no%20identificados?foo=bar");
+
+static char buf4[8096] = "/print/%E4%B8%8D%E5%8F%AF%E5%86%8D%E7%94%9F%E8%B5%84%E6%BA%90/?test";
+static int  blen4 = strlen("/print/%E4%B8%8D%E5%8F%AF%E5%86%8D%E7%94%9F%E8%B5%84%E6%BA%90/?test");
+
+static char qbuf[8096] = "p1=v1&param2=value2";
+static int  qlen = strlen("p1=v1&param2=value2");
+static char qbuf2[8096] = "key%201=%C2%BFPeroc%C3%B3mopuedesdecir%20esto%3F&param2=val2";
+static int  qlen2 = strlen("key%201=%C2%BFPeroc%C3%B3mopuedesdecir%20esto%3F&param2=val2");
+static char qbuf3[8096] = "key%201=%C2%BFPeroc%C3%B3mopuedesdecir%20esto%3F&param2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222220=val2&ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1";
+static int  qlen3 = strlen("key%201=%C2%BFPeroc%C3%B3mopuedesdecir%20esto%3F&param2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222220=val2&ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1");
+
+
+static void BM_sse_decode(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    sse_decode(buf, blen, &qslen);
+  }
+}
+static void BM_path_decode(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    path_decode(buf, blen, &qslen);
+  }
+}
+static void BM_path_decode2(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    path_decode2(buf, blen, &qslen);
+  }
+}
+static void BM_path_decode3(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    path_decode3(buf, blen, &qslen);
+  }
+}
+static void BM_sse_decode_long(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    sse_decode(buf2, blen2, &qslen);
+  }
+}
+static void BM_path_decode_long(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    path_decode(buf2, blen2, &qslen);
+  }
+}
+static void BM_path_decode2_long(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    path_decode2(buf2, blen2, &qslen);
+  }
+}
+static void BM_path_decode3_long(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    path_decode3(buf2, blen2, &qslen);
+  }
+}
+static void BM_sse_decode_complex(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    sse_decode(buf3, blen3, &qslen);
+  }
+}
+static void BM_path_decode_complex(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    path_decode(buf3, blen3, &qslen);
+  }
+}
+static void BM_path_decode2_complex(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    path_decode2(buf3, blen3, &qslen);
+  }
+}
+static void BM_path_decode3_complex(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    path_decode3(buf3, blen3, &qslen);
+  }
+}
+static void BM_sse_decode_chinese(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    sse_decode(buf4, blen4, &qslen);
+  }
+}
+static void BM_path_decode_chinese(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    path_decode(buf4, blen4, &qslen);
+  }
+}
+static void BM_path_decode2_chinese(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    path_decode2(buf4, blen4, &qslen);
+  }
+}
+static void BM_query_args_old(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    parse_query_args_old(qbuf, qlen);
+  }
+}
+static void BM_query_args(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    parse_query_args(qbuf, qlen);
+  }
+}
+static void BM_query_args2(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    parse_query_args2(qbuf, qlen);
+  }
+}
+static void BM_query_args_decodes_old(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    parse_query_args_old(qbuf3, qlen3);
+  }
+}
+static void BM_query_args_decodes(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    parse_query_args(qbuf3, qlen3);
+  }
+}
+static void BM_query_args_decodes2(benchmark::State& state) {
+  int qslen = 0;
+  for (auto _ : state) {
+    parse_query_args2(qbuf3, qlen3);
+  }
+}
+
+BENCHMARK(BM_query_args_old);
+BENCHMARK(BM_query_args);
+BENCHMARK(BM_query_args2);
+BENCHMARK(BM_query_args_decodes_old);
+BENCHMARK(BM_query_args_decodes);
+BENCHMARK(BM_query_args_decodes2);
+/*
+BENCHMARK(BM_sse_decode);
+BENCHMARK(BM_path_decode);
+BENCHMARK(BM_path_decode2);
+BENCHMARK(BM_path_decode3);
+BENCHMARK(BM_sse_decode_long);
+BENCHMARK(BM_path_decode_long);
+BENCHMARK(BM_path_decode2_long);
+BENCHMARK(BM_path_decode3_long);
+BENCHMARK(BM_sse_decode_complex);
+BENCHMARK(BM_path_decode_complex);
+BENCHMARK(BM_path_decode2_complex);
+BENCHMARK(BM_path_decode3_complex);
+BENCHMARK(BM_sse_decode_chinese);
+BENCHMARK(BM_path_decode_chinese);
+BENCHMARK(BM_path_decode2_chinese);
+*/
+BENCHMARK_MAIN();
+/*
+int main() {
+
+  int qslen = 0;
+  //printf("%.*s\n",blen4,buf4);
+  //path_decode2(buf4, blen4, &qslen);
+  //sse_decode(buf, blen, &qslen);
+
+  parse_query_args2(qbuf3, qlen3);
+
+}
+
+
+*/
diff --git a/gbench/string.cpp b/gbench/string.cpp
index a4eb0e0..cd65715 100644
--- a/gbench/string.cpp
+++ b/gbench/string.cpp
@@ -3,6 +3,7 @@
 #include <stdlib.h>
 #include <stdint.h>
 #include <string>
+#include <string.h>
 #include <x86intrin.h>
 #ifdef __AVX2__
 #include <immintrin.h>
@@ -27,10 +28,14 @@ static long _strtol( char* buf ) {
   char * endptr = buf+4;
   return strtol(buf, &endptr, 10);
 }
-static long my_strtol( char* s ) {
+static long my_strtol( char* s, int maxlen ) {
   long l;
+  int n = 0;
+  benchmark::DoNotOptimize(n);
   while (_isdigit(*s)) {
     l = (l * 10) + (*s++ - '0');
+    n+=1;
+    if ( n > maxlen ) return l;
   }
   return l;
 }
@@ -43,11 +48,16 @@ static long my_strtol2( char* s ) {
 }
 static long my_strtol3( char* s ) {
   long l;
-  while (IS_DIGIT2(*s)) {
+  while (IS_DIGIT(*s)) {
     l = (l * 10) + (*s++ - '0');
   }
   return l;
 }
+static long my_strcmp( char* s ) {
+  //if ( s[0] == 'C' && s[11] == 'e' ) return 0;
+  if ( s[0] == 'C' ) return 0;
+  return 1;
+}
 
 
 
@@ -57,7 +67,7 @@ static void BM_strtol(benchmark::State& state) {
 }
 static void BM_my_strtol(benchmark::State& state) {
   char buf[8096] = "123z4 ";
-  for (auto _ : state) { long x = my_strtol(buf); }
+  for (auto _ : state) { long x = my_strtol(buf,4); }
 }
 static void BM_my_strtol2(benchmark::State& state) {
   char buf[8096] = "123z4 ";
@@ -67,10 +77,37 @@ static void BM_my_strtol3(benchmark::State& state) {
   char buf[8096] = "123z4 ";
   for (auto _ : state) { long x = my_strtol3(buf); }
 }
+static void BM_strcmp(benchmark::State& state) {
+  char buf[8096] = "Content-Type";
+  for (auto _ : state) { 
+    long x;
+    benchmark::DoNotOptimize(x);
+    x = strcmp(buf, "Cntent-Type"); 
+  }
+}
+static void BM_my_strcmp(benchmark::State& state) {
+  char buf[8096] = "Content-Type";
+  for (auto _ : state) { 
+    long x;
+    benchmark::DoNotOptimize(x);
+    x = my_strcmp(buf);
+  }
+}
 
 BENCHMARK(BM_strtol);
 BENCHMARK(BM_my_strtol);
 BENCHMARK(BM_my_strtol2);
 BENCHMARK(BM_my_strtol3);
+BENCHMARK(BM_strcmp);
+BENCHMARK(BM_my_strcmp);
 BENCHMARK_MAIN();
 
+/*
+int main() {
+  char buf[8096] = "123z4 ";
+  //strcpy(buf,"942312");
+
+  printf(" my strtol %d\n", my_strtol(buf, 4));
+
+}
+*/
diff --git a/gbench/t.cpp b/gbench/t.cpp
index b587e64..c335b1d 100644
--- a/gbench/t.cpp
+++ b/gbench/t.cpp
@@ -18,8 +18,26 @@
 #define unlikely(x) (x)
 #endif
 
+#ifdef _MSC_VER
+#define ALIGNED(n) _declspec(align(n))
+#else
+#define ALIGNED(n) __attribute__((aligned(n)))
+#endif
+
+
 #define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
 
+#define CHAR4_TO_INT(a, b, c, d)         \
+   (unsigned int)((d << 24) | (c << 16) | (b << 8) | a)
+
+
+#define CHECK_END()                                                                                                                \
+    if (buf == buf_end) {                                                                                                          \
+        *ret = -2;                                                                                                                 \
+        return NULL;                                                                                                               \
+    }
+
+
 #define CHECK_EOF()                                                                                                                \
     if (buf == buf_end) {                                                                                                          \
         *ret = -2;                                                                                                                 \
@@ -36,6 +54,7 @@
     CHECK_EOF();                                                                                                                   \
     EXPECT_CHAR_NO_CHECK(ch);
 
+
 // Table for converting to lower case
 #define TOLC(c) __lct[(unsigned char)c]
 static const unsigned char __lct[] __attribute__((aligned(64))) = {
@@ -133,6 +152,33 @@ static const char *findchar(const char *buf, const char *buf_end, const char *ra
     }
     return buf;
 }
+static const char *adv_token(const char *buf, int *ret) {
+        const char *tok_start = buf;                                                                                               
+        const char *buf_end = buf+512;
+        static const char ranges2[] = "\000\042\177\177";                                                              
+        int found2;                                                                                                               
+        buf = findchar(buf, buf+512, ranges2, sizeof(ranges2) - 1, &found2);                                                       
+        if (!found2) {                                                                                                             
+            CHECK_END();                                                                                                           
+        } else if ( unlikely(*buf != ' ' )) {                                                                                      
+            *ret = -1;                                                                                                             
+            return NULL;                                                                                                           
+        }                                                                                                                          
+        while (1) {                                                                                                                
+            if (*buf == ' ') {                                                                                                    
+                return buf;                                                                                                             
+            } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {                                                                      
+                if ((unsigned char)*buf < '\040' || *buf == '\177') {                                                          
+                    *ret = -1;                                                                                                     
+                    return NULL;
+                }                                                                                                             
+            }                                                                                                                
+            ++buf;                                                                                                         
+            CHECK_END();                                                                                                  
+        }                                                                                                                
+        *ret = buf - tok_start;                                                                                      
+        return tok_start;
+}
 
 static const char *get_token_to_eol(const char *buf, const char *buf_end, int *ret)
 {
@@ -169,6 +215,162 @@ FOUND_CTL:
 
     return buf;
 }
+static inline int getSession( const char *buf, size_t buflen ) {
+  const char *end = buf + buflen;
+  const char *last = buf;
+  const char *ses;
+  int len;
+
+  static char ALIGNED(16) ranges1[] = "==" ";;";
+  int found;
+  int state = 0;
+  do {
+    last = buf;
+    buf = findchar(buf, end, ranges1, sizeof(ranges1) - 1, &found);
+    if ( found ) {
+      if ( *buf == '=' ) {
+        //printf( " fnd >%.*s<\n", buf-last, last );  
+        if ( state == 0 ) {
+          // Save out the mrsession id 
+          if ( buf-last == 9 && ( *((unsigned int *)(last)) == CHAR4_TO_INT('m', 'r', 's','e') ) ) {
+            state = 1;
+          }
+          buf+=1;
+        }
+      }
+      else if ( *buf == ';' ) {
+        //printf( " fnd >%.*s<\n", buf-last, last );  
+        if (state == 1 ) {
+          ses = last;
+          len = buf-last;
+          return len;
+        }
+        state = 0;
+        buf+=1;
+        while ( *buf == 32 ) buf++;
+      }
+    }
+  } while( found );
+  if (state) {
+    ses = last;
+    len = buf-last;
+    return len;
+  }
+  return -1;
+}
+static const char *my_get_eol128(const char *buf) {
+  //__m128i* pSrc1 = (__m128i *)string;         // init pointer to start of string
+  __m128i m0 = _mm_set1_epi8(13);              // vector of 16 `\0` characters
+
+  while (1)
+  {
+    __m128i v0 = _mm_loadu_si128((const __m128i *)buf);
+    __m128i v1 = _mm_cmpeq_epi8(v0, m0);    // compare all 16 chars
+    unsigned int vmask = _mm_movemask_epi8(v1);      // get 16 comparison result bits
+    if (vmask != 0) {
+        buf += TZCNT(vmask) + 2;
+        break;                              // we found a `\0`, break out of loop
+    }
+    buf += 16; //pSrc1++;                                // next 16 characters...
+  }
+  return buf;
+}
+
+ //64bits  256bits  bytes 8 * 32 
+static const char *my_get_eol(const char *buf) {
+
+  __m256i m13 = _mm256_set1_epi8(13);             
+  while (1)
+  {
+    __m256i v0 = _mm256_loadu_si256((const __m256i *)buf);
+    __m256i v1 = _mm256_cmpeq_epi8(v0, m13);     
+    unsigned long vmask = _mm256_movemask_epi8(v1);  
+    if (vmask != 0) {
+        buf += TZCNT(vmask) + 2;
+        break;                             
+    }
+    buf += 32; //pSrc1++;                 
+  }
+  return buf;
+}
+  __m256i m32 = _mm256_set1_epi8(32);             
+static const char *get_to_space(const char *buf, int *len) {
+  const char *orig = buf;
+  while (1)
+  {
+    __m256i v0 = _mm256_loadu_si256((const __m256i *)buf);
+    __m256i v1 = _mm256_cmpeq_epi8(v0, m32);     
+    unsigned long vmask = _mm256_movemask_epi8(v1);  
+    if (vmask != 0) {
+        buf += TZCNT(vmask) + 1;
+        break;                             
+    }
+    buf += 32; 
+  }
+  *len = buf-orig-1;
+  return buf;
+}
+
+__m256i m59 = _mm256_set1_epi8(59);
+__m256i m61 = _mm256_set1_epi8(61);
+static int getSession_avx2( const char* buf, const char* buf_end ) {
+  unsigned int msk;
+  int i=0,tz; // 32B index
+  int cnt = 0;
+  unsigned int shifted;
+  const char *sbuf = buf;
+  const char *obuf = buf;
+  int name_or_value = 0;
+  int found = 0;
+
+  do {
+    const char *block_start = obuf+32*i;
+    __m256i b0 = _mm256_loadu_si256((const __m256i *) block_start);
+    msk = _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b0, m59), _mm256_cmpeq_epi8(b0, m61) ) );
+    while (1) {
+
+      //if ( buf >= buf_end ) { goto sesdone; }
+      shifted = buf-block_start;
+      if ( shifted >= 32 ) break;
+      tz = TZCNT((msk >> shifted));
+      if ( tz < 32 ) {
+        buf += tz;
+        //printf( " fnd >%.*s<\n", buf-sbuf, sbuf );  
+        if ( buf >= buf_end ) { goto sesdone; }
+        if ( name_or_value == 1 ) {
+          if ( *buf == '=' ) { buf += 1; continue; } // = in value field
+          if ( found ) {
+            //printf( " done >%.*s<\n", buf-sbuf, sbuf );  
+            return buf-sbuf;
+          }
+          buf+=1;
+          name_or_value = 0;
+        } else {
+          if ( buf-sbuf == 9 && ( *((unsigned int *)(sbuf)) == CHAR4_TO_INT('m', 'r', 's','e') ) ) {
+            found = 1;
+          }
+          name_or_value = 1;
+        }
+        buf += 1;
+        sbuf = buf;
+      } else {
+        buf += 32 - shifted;
+        break;
+      }
+
+    }
+    i+=1;
+    if ( buf >= buf_end ) { goto sesdone; }
+  } while ( buf-obuf < buf_end-obuf );
+
+sesdone:
+  if ( found ) {
+    //printf( " sesdone >%.*s<\n", buf-sbuf, sbuf );  
+    return buf-sbuf;
+  }
+  return 0;
+}
+
 
 static const char *parse_headers(const char *buf, const char *buf_end, int *ret)
 {
@@ -227,7 +429,8 @@ static const char *parse_headers(const char *buf, const char *buf_end, int *ret)
               //if ( buf[0] == 'a' && buf[13] == 'r' ) { //"application/mrpacker"
                 //mrr->flags = 2;
               //} 
-              buf = get_token_to_eol(buf, buf_end, ret); 
+              //buf = get_token_to_eol(buf, buf_end, ret); 
+              buf = my_get_eol(buf);
               goto skipvalue;
             }
             if ( buf[13] == ':' ) { // Cache-Control:
@@ -247,7 +450,8 @@ static const char *parse_headers(const char *buf, const char *buf_end, int *ret)
               //headers[*num_headers].name_len = 16;
               buf += 18;
               //mrr->ip = buf;
-              buf = get_token_to_eol(buf, buf_end, ret); 
+              //buf = get_token_to_eol(buf, buf_end, ret); 
+              buf = my_get_eol(buf);
               //mrr->ip_len = headers[*num_headers].value_len;
               goto skipvalue;
             }
@@ -274,7 +478,8 @@ static const char *parse_headers(const char *buf, const char *buf_end, int *ret)
               //headers[*num_headers].name_len = 9;
               buf += 11;
               //mrr->ip = buf;
-              buf = get_token_to_eol(buf, buf_end, ret); 
+              //buf = get_token_to_eol(buf, buf_end, ret); 
+              buf = my_get_eol(buf);
               //mrr->ip_len = headers[*num_headers].value_len;
               goto skipvalue;
             }
@@ -441,7 +646,8 @@ static const char *parse_headers(const char *buf, const char *buf_end, int *ret)
             //headers[*num_headers].name_len = 0;
         }
 hvalue:
-        if ((buf = get_token_to_eol(buf, buf_end, ret)) == NULL) {
+        //if ((buf = get_token_to_eol(buf, buf_end, ret)) == NULL) {
+        if ((buf = my_get_eol(buf)) == NULL) {
             return NULL;
         }
 skipvalue:
@@ -631,7 +837,7 @@ static const char* parse_headers_avx2(const char* buf, const char* buf_end, int*
       tz = TZCNT(bitmap);
       if ( tz < 64 ) { // tz is 64 if not found
         p += tz;
-        printf( " fnd >%.*s<\n", p-buf, buf );  
+        //printf( " fnd >%.*s<\n", p-buf, buf );  
         if ( state == 0 ) { // :
           state = 1;
           p += 2; buf = p;
@@ -661,44 +867,7 @@ wedone:
 }
 
 
-static const char *my_get_eol128(const char *buf) {
-  //__m128i* pSrc1 = (__m128i *)string;         // init pointer to start of string
-  __m128i m0 = _mm_set1_epi8(13);              // vector of 16 `\0` characters
-
-  while (1)
-  {
-    __m128i v0 = _mm_loadu_si128((const __m128i *)buf);
-    __m128i v1 = _mm_cmpeq_epi8(v0, m0);    // compare all 16 chars
-    unsigned int vmask = _mm_movemask_epi8(v1);      // get 16 comparison result bits
-    if (vmask != 0) {
-        buf += TZCNT(vmask) + 2;
-        break;                              // we found a `\0`, break out of loop
-    }
-    buf += 16; //pSrc1++;                                // next 16 characters...
-  }
-  return buf;
-}
-
- //64bits  256bits  bytes 8 * 32 
-__m256i m13 = _mm256_set1_epi8(13);             
-
-static const char *my_get_eol(const char *buf) {
 
-  while (1)
-  {
-    __m256i v0 = _mm256_loadu_si256((const __m256i *)buf);
-    __m256i v1 = _mm256_cmpeq_epi8(v0, m13);     
-    unsigned long vmask = _mm256_movemask_epi8(v1);  
-    if (vmask != 0) {
-        buf += TZCNT(vmask) + 2;
-        break;                             
-    }
-    buf += 32; //pSrc1++;                 
-  }
-  return buf;
-}
-
-//__m256i m13 = _mm256_set1_epi8(13);
 __m256i m58 = _mm256_set1_epi8(58);   //  0x1313131313131313...
                                       //  0x32333435363713   //  abcdef\r
                                       //  32 bit number 0x40
@@ -708,6 +877,7 @@ static void parse_mine( const char* buf ) {
 
   //__m256i b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,b10,b11,b12,b13,b14,b15;
   __m256i b0,b1,b2,b3,b4,b5,b6,b7;
+  __m256i m13 = _mm256_set1_epi8(13);
 
   b0 = _mm256_loadu_si256((const __m256i *) (buf + 32*0)); // buf[0]
   b1 = _mm256_loadu_si256((const __m256i *) (buf + 32*1)); // buf[32]
@@ -803,6 +973,7 @@ static void parse_mine3( const char* buf ) {
 
   //__m256i b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,b10,b11,b12,b13,b14,b15;
   __m256i b0,b1,b2,b3,b4,b5,b6,b7;
+  __m256i m13 = _mm256_set1_epi8(13);
 
   const char *obuf = buf;
   const char *sbuf = buf;
@@ -873,7 +1044,7 @@ new512:
         } else {
           name_or_value = 1;
         }
-        printf( " fnd >%.*s<\n", buf-sbuf, sbuf );  
+        //printf( " fnd >%.*s<\n", buf-sbuf, sbuf );  
         buf += 2; if ( *buf == '\r' ) break; // \r\n\r\n marks the end
         sbuf = buf;
         if ( (buf-block_start)> 64 ) break; // TODO?
@@ -894,25 +1065,34 @@ done:
   i += 1;
 }
 
-static void parse_mine2( const char* buf ) {
-  unsigned int msk;
+static void parse_mine2( const char* buf, const char *buf_end ) {
+  unsigned long msk;
   int i=0,tz; // 32B index
   int cnt = 0;
   unsigned int shifted;
   const char *sbuf = buf;
   const char *obuf = buf;
   int name_or_value = 0;
+  __m256i m13 = _mm256_set1_epi8(13);
 
   do {
-    const char *block_start = obuf+32*i;
+    const char *block_start = obuf+64*i;
     __m256i b0 = _mm256_loadu_si256((const __m256i *) block_start);
-    msk = _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b0, m13), _mm256_cmpeq_epi8(b0, m58) ) );
+    __m256i b1 = _mm256_loadu_si256((const __m256i *) (block_start+32));
+    msk = (unsigned int) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b0, m13), _mm256_cmpeq_epi8(b0, m58) ) )  |
+        ((unsigned long) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b1, m13), _mm256_cmpeq_epi8(b1, m58) ) ) << 32);
+
+    //const char *block_start = obuf+32*i;
+    //__m256i b0 = _mm256_loadu_si256((const __m256i *) block_start);
+    //msk = _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b0, m13), _mm256_cmpeq_epi8(b0, m58) ) );
     while (1) {
 
       shifted = buf-block_start;
-      if ( shifted >= 32 ) break;
+      if ( shifted >= 64 ) break;
       tz = TZCNT((msk >> shifted));
-      if ( tz < 32 ) {
+      //printf("DELME mski %016llx shift %d\n", msk, shifted );
+      //printf("DELME shft %016llx\n", msk>>shifted );
+      if ( tz < 64 ) {
         buf += tz;
         if ( name_or_value == 1 ) {
           if ( *buf == ':' ) { buf += 1; continue; } // : in value field
@@ -921,16 +1101,17 @@ static void parse_mine2( const char* buf ) {
           name_or_value = 1;
         }
         printf( " fnd >%.*s<\n", buf-sbuf, sbuf );  
-        buf += 2; if ( *buf == '\r' ) break; // \r\n\r\n marks the end
+        buf += 2; if ( *buf == '\r' ) return; // \r\n\r\n marks the end
         sbuf = buf;
       } else {
-        buf += 32 - shifted;
+        buf += 64 - shifted;
         break;
       }
 
     }
     i+=1;
-  } while ( *buf != '\r' );
+  } while ( buf < buf_end );
+  //} while ( *buf != '\r' );
 }
 
 static void parse_sse4( const char* buf ) {
@@ -945,15 +1126,26 @@ static void parse_mysse4( const char* buf ) {
     buf = my_get_eol( buf );
   }
 }
-/*
 static char buf[8096] = "Host: server\r\n"
 "User-Agent: Mozilla/5.0 (X11; Linux x86_64) Gecko/20130501 Firefox/30.0 AppleWebKit/600.00 Chrome/30.0.0000.0 Trident/10.0 Safari/600.00\r\n"
 "Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
 "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,* /*;q=0.8\r\n"
+"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,* /*;q=0.8\r\n"
+"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,* /*;q=0.8\r\n"
+"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,* /*;q=0.8\r\n"
+"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,* /*;q=0.8\r\n"
 "Accept-Language: en-US,en;q=0.5\r\n"
 "Connection: keep-alive\r\n\r\n";
 static char buf2[8096] = "Host: localhost:8080\r\nUser-Agent: python-requests/2.31.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: * /*\r\nConnection: keep-alive\r\nCookie: foo=b=ar\r\nContent-Length: 0\r\n\r\n";
+static char path[8096] = "/foo/bar/bazfdasfffffffffffffffffffffffffffffffffffffffdfffffffffffffffffffffffffffffffffffffffffffffffffff ";
+
+//static  char cbuf[8096] = "uid=123456781234567890; mrsession=1234567890.1234567890.12; wd=2560x1600";
+//static  int  clen= strlen("uid=123456781234567890; mrsession=1234567890.1234567890.12; wd=2560x1600");
+static  char cbuf[8096] = "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa; mrsession=1234567890.1234567890.12; xxxxxxxxxxxxxxxxxxxxxxxxx=zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz";
+static  int  clen= strlen("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa; mrsession=1234567890.1234567890.12; xxxxxxxxxxxxxxxxxxxxxxxxx=zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz");
+static  const char *cend = cbuf + clen;
 
+/*
 static void BM_SlowParse(benchmark::State& state) {
   // Perform setup here
   std::string text = "Host: server\n"
@@ -984,82 +1176,94 @@ static void BM_my_get_eol(benchmark::State& state) {
 }
 
 static void BM_my_header_parse(benchmark::State& state) {
-  // Perform setup here
   for (auto _ : state) {
-    // This code gets timed
     parse_mine(buf);
   }
 }
 static void BM_my2_header_parse(benchmark::State& state) {
-  // Perform setup here
   for (auto _ : state) {
-    // This code gets timed
     parse_mine2(buf);
   }
 }
 static void BM_my3_header_parse(benchmark::State& state) {
-  // Perform setup here
   for (auto _ : state) {
-    // This code gets timed
     parse_mine3(buf);
   }
 }
 
 
 static void BM_old_header_parse(benchmark::State& state) {
-  // Perform setup here
   int ret = 0;
   for (auto _ : state) {
-    // This code gets timed
-    parse_headers(buf,buf+512,&ret);
+    parse_headers(buf,buf+2048,&ret);
   }
 }
 
 static void BM_avx2_header_parse(benchmark::State& state) {
-  // Perform setup here
   int ret = 0;
   for (auto _ : state) {
-    // This code gets timed
-    parse_headers_avx2(buf,buf+512,&ret);
+    parse_headers_avx2(buf,buf+2048,&ret);
+  }
+}
+
+static void BM_adv_token(benchmark::State& state) {
+  int ret = 0;
+  int path_len = 0;
+  for (auto _ : state) {
+    adv_token(path, &path_len);
+  }
+}
+static void BM_adv_token_avx2(benchmark::State& state) {
+  int ret = 0;
+  int path_len = 0;
+  for (auto _ : state) {
+    get_to_space(path, &path_len);
   }
 }
+static void BM_get_session(benchmark::State& state) {
+  int ret = 0;
+  for (auto _ : state) {
+    getSession(cbuf, clen);
+  }
+}
+static void BM_get_session_avx2(benchmark::State& state) {
+  int ret = 0;
+  for (auto _ : state) {
+    getSession_avx2(cbuf, cend);
+  }
+}
+
+
+
 
 
 
 
 //BENCHMARK(BM_SlowParse);
-BENCHMARK(BM_sse4_get_eol);
-BENCHMARK(BM_my_get_eol);
-BENCHMARK(BM_my3_header_parse);
-BENCHMARK(BM_my2_header_parse);
+//BENCHMARK(BM_sse4_get_eol);
+//BENCHMARK(BM_my_get_eol);
+//BENCHMARK(BM_my3_header_parse);
+//BENCHMARK(BM_my2_header_parse);
 //BENCHMARK(BM_my_header_parse);
-BENCHMARK(BM_old_header_parse);
-BENCHMARK(BM_avx2_header_parse);
+//BENCHMARK(BM_old_header_parse);
+//BENCHMARK(BM_avx2_header_parse);
+//BENCHMARK(BM_adv_token);
+//BENCHMARK(BM_adv_token_avx2);
+//BENCHMARK(BM_get_session);
+//BENCHMARK(BM_get_session_avx2);
 BENCHMARK_MAIN();
 
-*/
 
+*/
 int main() {
-  char buf[8096] = "Host: server\r\n"
-"User-Agent: Mozilla/5.0 (X11; Linux x86_64) Gecko/20130501 Firefox/30.0 AppleWebKit/600.00 Chrome/30.0.0000.0 Trident/10.0 Safari/600.00\r\n"
-"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
-"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
-"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
-"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
-"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
-"Cookie: uid=12345678901234567890; __utma=1.1234567890.1234567890.1234567890.1234567890.12; wd=2560x1600\r\n"
-"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,* /*;q=0.8\r\n"
-"Accept-Language: en-US,en;q=0.5\r\n"
-"Connection: keep-alive\r\n\r\n";
-  //strcpy(buf,"Host: localhost:8080\r\nUser-Agent: curl/7.68.0\r\nAccept: * /*\r\n\r\n");
-  //strcpy(buf,"Host: localhost:8080\r\nUser-Agent: python-requests/2.31.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: * /*\r\nConnection: keep-alive\r\nCookie: foo=b=ar\r\nContent-Length: 0\r\n\r\n");
 
+  //getSession_avx2(cbuf,cend);
 
   int ret = 0;
-  parse_headers_avx2(buf,buf+512,&ret);
+  //parse_headers_avx2(buf,buf+512,&ret);
   //parse_headers(buf,buf+2048,&ret);
-  //parse_mine3(buf);
-  printf(" ret=%d\n",ret);
+  parse_mine2(buf, buf+2048);
+  //printf(" ret=%d\n",ret);
 
   //unsigned long long l = 0x80008020ull;
   //unsigned int s = 7;
@@ -1067,4 +1271,3 @@ int main() {
 
 }
 
-
diff --git a/readme b/readme
index dacd313..502e0ac 100644
--- a/readme
+++ b/readme
@@ -5,6 +5,8 @@ Updating version:
   internals app.c and response.c
 
 
+sudo sh -c "echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor"
+sudo sh -c "echo powersave | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor"
 
 
 wrk -t4 -c32 -d2s http://localhost:8080/
@@ -49,15 +51,38 @@ User-Agent: curl/7.51.0
 Accept: */*
 
 <<<
+POST /form HTTP/1.1
+Host: localhost:8080
+User-Agent: python-requests/2.31.0
+Accept-Encoding: gzip, deflate
+Accept: */*
+Connection: keep-alive
+Content-Length: 19
+Content-Type: application/x-www-form-urlencoded
+
+p1=v1&param2=value2POST /form HTTP/1.1
+Host: localhost:8080
+User-Agent: python-requests/2.31.0
+Accept-Encoding: gzip, deflate
+Accept: */*
+Connection: keep-alive
+Content-Length: 19
+Content-Type: application/x-www-form-urlencoded
+
+p1=v1&param2=value2
 
+<<<
 curl http://localhost:8080/
 
 Valgrind doesn't work with AVX2 ... 
 valgrind --tool=memcheck --suppressions=valgrind-python.supp python3 -E -tt ./tst.py
 
+Big headers
+curl http://localhost:8080/ -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) Gecko/20130501 Firefox/30.0 AppleWebKit/600.00 Chrome/30.0.0000.0 Trident/10.0 Safari/600.00' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -H 'Accept-Language: en-US,en;q=0.5' -H 'Cookie: mrsession=43709dd361cc443e976b05714581a7fb; foo=fdsfdasdfasdfdsfasdfsdfsdfasdfas; short=fazc;' -H 'Connection: keep-alive'
+
 FORM urlencoded TODO more params.  Can we fix the performance here?
-curl -d "param1=value1&param2=value2" -X POST http://localhost:8080/ -H "Content-Type: application/x-www-form-urlencoded"
-wrk -t4 -c32 -d1s http://localhost:8080/form -s tests/lua/form.lua
+curl -d "param1=value1&param2=value2" -X POST http://localhost:8080/form -H "Content-Type: application/x-www-form-urlencoded"
+wrk -t1 -c1 -d1s http://localhost:8080/form -s tests/lua/form.lua
 
 cookie test:
 curl --cookie "mrsession=bnwg23LQbOmdAtcBELdLwsFcyJkN8iGp" http://localhost:8080/
diff --git a/runtests b/runtests
new file mode 100755
index 0000000..5e92ad4
--- /dev/null
+++ b/runtests
@@ -0,0 +1,5 @@
+
+sudo sh -c "echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor"
+python dotests.py
+sudo sh -c "echo powersave | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor"
+
diff --git a/src/mrhttp/app.py b/src/mrhttp/app.py
index a39cd4b..f10d33c 100644
--- a/src/mrhttp/app.py
+++ b/src/mrhttp/app.py
@@ -117,38 +117,6 @@ class Application(mrhttp.CApp):
       self.uses_mrq = True
 
     if not uri.startswith('/'): uri = '/' + uri
-    #params = {}
-    #params["methods"] = methods
-    #params["options"] = options
-    #params["type"] = _type
-    #params["mrq"] = None
-    #for o in options:
-      #if o.startswith("mrq"):
-        #
-        #self.uses_mrq = True
-        #if self._mrq == None:
-          #srvs = self.config.get("mrq", None)
-          #print(srvs)
-          #if type(srvs) != list or type(srvs[0]) != tuple or len(srvs) == 0:
-            #print("When using MrQ app.config['mrq'] must be set to a list of (host,port) tuple pairs. Exiting")
-            #exit(1)
-          #self._mrq = []
-          #if type(srvs) == list and type(srvs[0]) == list:
-            #for s in srvs:
-              #self._mrq.append( MrqClient( s, self.loop) )
-          #else:
-            #self._mrq.append( MrqClient( srvs, self.loop) )
-        #if o == "mrq": 
-          #o = "mrq0"
-        #l = []
-        #try:
-          #for n in o[3:]:
-            #l.append( self._mrq[int(n)] )
-          #params["mrq"] = l
-        #except:
-          #print("Error mrq route specifies a cluster that doesn't exist")
-          #print("uri:", uri, "mrq", o)
-          #exit(1)
 
     def response(func): 
       self.router.add_route( func, uri, methods, options, _type )
@@ -288,29 +256,7 @@ class Application(mrhttp.CApp):
         for r in self.requests:
           r.cleanup()
         self.requests = None
-
     
-        #for ref in gc.get_referrers(self.requests[0]):
-          #if type(ref) == list:
-            #print("list")
-          #else:
-            #print(ref)
-        #print("DELME refcnt ", sys.getrefcount(self.requests[0]))
-        #r = self.requests[0]
-        #print("id requests ", id(self.requests))
-        #rs = self.requests
-        #self.requests = None
-        #gc.collect()
-        #print (gc.get_referrers(rs))
-        #print("DELME refcnt ", sys.getrefcount(r))
-        #for ref in gc.get_referrers(r):
-          #if type(ref) == list:
-            #print("list")
-            #print("id ref ", id(ref))
-          #else:
-            #print(ref)
-
-
   # Update the response date string every few seconds
   def updateDateString(self):
     self.updateDate( format_date_time(None) )
diff --git a/src/mrhttp/internals/faststrcmp.h b/src/mrhttp/internals/faststrcmp.h
index dde16e2..079f55d 100644
--- a/src/mrhttp/internals/faststrcmp.h
+++ b/src/mrhttp/internals/faststrcmp.h
@@ -1,4 +1,6 @@
 
+// TODO Where did this come from? 
+
 static const unsigned char lct[] __attribute__((aligned(64))) = {
         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
         0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
diff --git a/src/mrhttp/internals/module.h b/src/mrhttp/internals/module.h
index 1fef96c..293094d 100644
--- a/src/mrhttp/internals/module.h
+++ b/src/mrhttp/internals/module.h
@@ -125,7 +125,7 @@ static PyMemberDef Request_members[] = {
     {"_files", T_OBJECT, offsetof(Request, py_files),  0, NULL},
     {"servers_down",T_OBJECT, offsetof(Request, py_mrq_servers_down),0, NULL},
     {"user",   T_OBJECT, offsetof(Request, py_user),   0, NULL},
-    {"ip",     T_OBJECT, offsetof(Request, py_ip),     0, NULL},
+    //{"ip",     T_OBJECT, offsetof(Request, py_ip),     0, NULL},
     {NULL},
 };
 static PyGetSetDef Request_getset[] = {
diff --git a/src/mrhttp/internals/mrhttpparser.c b/src/mrhttp/internals/mrhttpparser.c
index 7cb867c..c87da03 100644
--- a/src/mrhttp/internals/mrhttpparser.c
+++ b/src/mrhttp/internals/mrhttpparser.c
@@ -73,112 +73,7 @@ static void print_buffer( char* b, int len ) {
     CHECK_END();                                                                                                                   \
     EXPECT_CHAR_NO_CHECK(ch);
 
-static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
-                                    "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
-                                    "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
-                                    "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
-                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
-                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
-                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
-                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
 
-static const char *findchar(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
-{
-    *found = 0;
-#if __SSE4_2__
-    if (likely(buf_end - buf >= 16)) {
-        __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
-
-        size_t left = (buf_end - buf) & ~15;
-        do {
-            __m128i b16 = _mm_loadu_si128((const __m128i *)buf);
-            int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
-            if (unlikely(r != 16)) {
-                buf += r;
-                *found = 1;
-                break;
-            }
-            buf += 16;
-            left -= 16;
-        } while (likely(left != 0));
-    }
-#else
-    /* suppress unused parameter warning */
-    (void)buf_end;
-    (void)ranges;
-    (void)ranges_size;
-#endif
-    return buf;
-}
-
-static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
-{
-    const char *token_start = buf;
-
-#ifdef __SSE4_2__
-    static const char ranges1[] = "\0\010"
-                                  /* allow HT */
-                                  "\012\037"
-                                  /* allow SP and up to but not including DEL */
-                                  "\177\177"
-        /* allow chars w. MSB set */
-        ;
-    int found;
-    buf = findchar(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
-    if (found)
-        goto FOUND_CTL;
-#else
-    /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
-    while (likely(buf_end - buf >= 8)) {
-#define DOIT()                                                                                                                     \
-    do {                                                                                                                           \
-        if (unlikely(!IS_PRINTABLE_ASCII(*buf)))                                                                                   \
-            goto NonPrintable;                                                                                                     \
-        ++buf;                                                                                                                     \
-    } while (0)
-        DOIT();
-        DOIT();
-        DOIT();
-        DOIT();
-        DOIT();
-        DOIT();
-        DOIT();
-        DOIT();
-#undef DOIT
-        continue;
-    NonPrintable:
-        if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
-            goto FOUND_CTL;
-        }
-        ++buf;
-    }
-#endif
-    for (;; ++buf) {
-        CHECK_END();
-        if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
-            if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
-                goto FOUND_CTL;
-            }
-        }
-    }
-FOUND_CTL:
-    if (likely(*buf == '\015')) {
-        ++buf;
-        EXPECT_CHAR('\012');
-        *token_len = buf - 2 - token_start;
-    } else if (*buf == '\012') {
-        *token_len = buf - token_start;
-        ++buf;
-    } else {
-        *ret = -1;
-        return NULL;
-    }
-    *token = token_start;
-
-    return buf;
-}
-
-#ifdef __AVX2__
 static unsigned long TZCNT(unsigned long long in) {
   unsigned long res;
   asm("tzcnt %1, %0\n\t" : "=r"(res) : "r"(in));
@@ -201,124 +96,12 @@ static int get_len_to_space(const char *buf, const char *buf_end) {
   }
 }
 
-
 static const char *parse_headers_avx2(const char *buf, const char *buf_end, struct mr_header *headers, size_t *num_headers,
                                  size_t max_headers, int *ret, struct mr_request *mrr)
-{
-  unsigned long long msk[8];  // 1 bit for each of 512 bytes matching  : or \r
-
-  //__m256i b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,b10,b11,b12,b13,b14,b15;
-  __m256i b0,b1,b2,b3,b4,b5,b6,b7;
-
-  __m256i m13 = _mm256_set1_epi8(13); // \r
-  __m256i m58 = _mm256_set1_epi8(58); // :
-
-  const char *obuf = buf;
-  const char *sbuf = buf;
-
-  int i;  // msk[i] 
-  int t;
-  unsigned int s = 0;
-  int name_or_value = 0;
-
-  const char *block_start = obuf;
-
-av_new512:
-  i = 0;
-  buf = obuf;
-  if ( buf >= buf_end ) { *ret = -1; return NULL; }
-
-  b0 = _mm256_loadu_si256((const __m256i *) (buf + 32*0)); // buf[0]
-  b1 = _mm256_loadu_si256((const __m256i *) (buf + 32*1)); // buf[32]
-  b2 = _mm256_loadu_si256((const __m256i *) (buf + 32*2)); // buf[64]
-  b3 = _mm256_loadu_si256((const __m256i *) (buf + 32*3)); // buf[96]
-  b4 = _mm256_loadu_si256((const __m256i *) (buf + 32*4)); // buf[128]
-  b5 = _mm256_loadu_si256((const __m256i *) (buf + 32*5));
-  b6 = _mm256_loadu_si256((const __m256i *) (buf + 32*6));
-  b7 = _mm256_loadu_si256((const __m256i *) (buf + 32*7)); // 256 bytes
-
-  msk[0] = (unsigned int)_mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b0, m13), _mm256_cmpeq_epi8(b0, m58) ) )  |
-        ((unsigned long) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b1, m13), _mm256_cmpeq_epi8(b1, m58) ) ) << 32);
-  msk[1] = (unsigned int)_mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b2, m13), _mm256_cmpeq_epi8(b2, m58) ) )  |
-        ((unsigned long) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b3, m13), _mm256_cmpeq_epi8(b3, m58) ) ) << 32);
-  msk[2] = (unsigned int)_mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b4, m13), _mm256_cmpeq_epi8(b4, m58) ) )  |
-        ((unsigned long) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b5, m13), _mm256_cmpeq_epi8(b5, m58) ) ) << 32);
-  msk[3] = (unsigned int)_mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b6, m13), _mm256_cmpeq_epi8(b6, m58) ) )  |
-        ((unsigned long) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b7, m13), _mm256_cmpeq_epi8(b7, m58) ) ) << 32);
-
-  b0 = _mm256_loadu_si256((const __m256i *) (buf + 32*8));
-  b1 = _mm256_loadu_si256((const __m256i *) (buf + 32*9));
-  b2 = _mm256_loadu_si256((const __m256i *) (buf + 32*10));
-  b3 = _mm256_loadu_si256((const __m256i *) (buf + 32*11));
-  b4 = _mm256_loadu_si256((const __m256i *) (buf + 32*12));
-  b5 = _mm256_loadu_si256((const __m256i *) (buf + 32*13));
-  b6 = _mm256_loadu_si256((const __m256i *) (buf + 32*14));
-  b7 = _mm256_loadu_si256((const __m256i *) (buf + 32*15));
-
-  msk[4] = (unsigned int)_mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b0, m13), _mm256_cmpeq_epi8(b0, m58) ) )  ^
-        ((unsigned long) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b1, m13), _mm256_cmpeq_epi8(b1, m58) ) ) << 32);
-  msk[5] = (unsigned int)_mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b2, m13), _mm256_cmpeq_epi8(b2, m58) ) )  ^
-        ((unsigned long) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b3, m13), _mm256_cmpeq_epi8(b3, m58) ) ) << 32);
-  msk[6] = (unsigned int)_mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b4, m13), _mm256_cmpeq_epi8(b4, m58) ) )  ^
-        ((unsigned long) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b5, m13), _mm256_cmpeq_epi8(b5, m58) ) ) << 32);
-  msk[7] = (unsigned int)_mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b6, m13), _mm256_cmpeq_epi8(b6, m58) ) )  ^
-        ((unsigned long) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b7, m13), _mm256_cmpeq_epi8(b7, m58) ) ) << 32);
-
-
-  // "Host: server\r\n"
-  do {
-
-    block_start = obuf+64*i;
-
-    while(1) {
-      s = buf-block_start;
-      t = TZCNT((msk[i]>>s));
-      if ( t < 64 ) {
-        buf += t;
-        if ( name_or_value == 1 ) {
-          if ( *buf == ':' ) { buf += 1; continue; } // : in value field
-          headers[*num_headers].value = sbuf;
-          headers[*num_headers].value_len = buf-sbuf;
-          ++*num_headers;
-          if (*num_headers >= max_headers) { *ret = -1; return NULL; }
-          name_or_value = 0;
-          buf += 2; if ( *buf == '\r' ) { goto av_done; } // \r\n\r\n marks the end
-        } else {
-          headers[*num_headers].name = sbuf;
-          headers[*num_headers].name_len = buf-sbuf;
-          name_or_value = 1;
-          buf += 2;
-        }
-        sbuf = buf;
-        if ( (buf-block_start)> 64 ) break; // TODO?
-      } else {
-        buf = block_start + 64;
-        break;
-      }
-
-    }
-
-    i+=1;
-    if ( buf[0] == '\r' ) goto av_done;
-  } while ( i < 8 && buf[0] != '\r' );
-
-  obuf += 512;
-  goto av_new512;
-
-av_done:
-  buf += 2;
-  *ret = 0;
-  return buf;
-}
-
-
-
-static const char *parse_headers_avx2_old(const char *buf, const char *buf_end, struct mr_header *headers, size_t *num_headers,
-                                 size_t max_headers, int *ret, struct mr_request *mrr)
 {
   unsigned long msk;
-  int i=0,tz; // 32B index
-  int shifted;
+  int i=0, tz; // 32B index
+  unsigned int shifted;
   const char *sbuf = buf;
   const char *obuf = buf;
   int name_or_value = 0;
@@ -327,18 +110,20 @@ static const char *parse_headers_avx2_old(const char *buf, const char *buf_end,
   __m256i m58 = _mm256_set1_epi8(58); // :
 
   do {
-    const char *block_start = obuf+32*i; i += 1;
+    const char *block_start = obuf+64*i;
     if ( block_start > buf_end ) { printf("DELME hdr too big\n"); *ret = -1; return NULL; }
+
     __m256i b0 = _mm256_loadu_si256((const __m256i *) block_start);
-    msk = _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b0, m13), _mm256_cmpeq_epi8(b0, m58) ) );
+    __m256i b1 = _mm256_loadu_si256((const __m256i *) (block_start+32));
+    msk = (unsigned int) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b0, m13), _mm256_cmpeq_epi8(b0, m58) ) )  | 
+        ((unsigned long) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b1, m13), _mm256_cmpeq_epi8(b1, m58) ) ) << 32);
 
     while (1) {
     
-    // "Host: server\r\n"
-    // Headers end on \r\n\r\n
       shifted = buf-block_start;
+      if ( shifted >= 64 ) break;
       tz = TZCNT((msk >> shifted));
-      if ( tz < 32 ) {
+      if ( tz < 64 ) {
         buf += tz;
 
         if ( name_or_value == 1 ) {
@@ -348,7 +133,7 @@ static const char *parse_headers_avx2_old(const char *buf, const char *buf_end,
           ++*num_headers;
           if (*num_headers >= max_headers) { printf("DELME hdr too many\n"); *ret = -1; return NULL; }
           name_or_value = 0;
-          buf += 2; if ( *buf == '\r' ) { break; } // \r\n\r\n marks the end
+          buf += 2; if ( *buf == '\r' ) { buf+=2; *ret=0; return buf; } // \r\n\r\n marks the end
         } else {
           headers[*num_headers].name = sbuf;
           headers[*num_headers].name_len = buf-sbuf;
@@ -357,298 +142,16 @@ static const char *parse_headers_avx2_old(const char *buf, const char *buf_end,
         }
         sbuf = buf;
       } else {
-        buf += 32 - shifted;
+        buf += 64 - shifted;
         break;
       }
 
     }
-  } while ( *buf != '\r' );
-  buf += 2;
-  *ret = 0;
+    i++;
+  } while ( buf < buf_end );
+  *ret = -1;
   return buf;
 }
-#endif
-
-static const char *parse_headers(const char *buf, const char *buf_end, struct mr_header *headers, size_t *num_headers,
-                                 size_t max_headers, int *ret, struct mr_request *mrr)
-{
-    if ( buf_end <= buf ) {
-      *ret = -2;
-      return NULL;
-    }
-    for (;; ++*num_headers) {
-        CHECK_END();
-        if (*buf == '\015') {
-            ++buf;
-            EXPECT_CHAR('\012');
-            break;
-        } else if (*buf == '\012') {
-            ++buf;
-            break;
-        }
-        if (*num_headers == max_headers) {
-            *ret = -1;
-            return NULL;
-        }
-        //printf(">%.*s<", 10, buf);
-        // Listed small to larger - probably best as most used TODO check bounds
-        switch ( TOLC(*buf) ) {
-          case 'h': // Host
-            headers[*num_headers].name = buf;
-            headers[*num_headers].name_len = 4;
-            buf += 6;
-            goto hvalue;
-          case 'c': 
-            if ( buf[6] == ':' ) { // Cookie:
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 6;
-              buf += 8;
-              goto hvalue;
-            } 
-            if ( buf[10] == ':' ) { // Connection: 
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 10;
-              buf += 12;
-              goto hvalue;
-            }
-            if ( buf[11] == ':' ) { // Content-MD5: 
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 11;
-              buf += 13;
-              goto hvalue;
-            }
-            if ( buf[12] == ':' ) { // Content-Type: 
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 12;
-              buf += 14;
-              //goto hvalue;
-              if ( buf[0] == 'a' && buf[13] == 'r' ) { //"application/mrpacker"
-                mrr->flags = 2;
-              } 
-              buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value, &headers[*num_headers].value_len, ret); 
-              goto skipvalue;
-            }
-            if ( buf[13] == ':' ) { // Cache-Control:
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 13;
-              buf += 15;
-              goto hvalue;
-            }
-            if ( buf[14] == ':' ) { // Content-Length:   
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 14;
-              buf += 16;
-              goto hvalue;
-            }
-            if ( buf[16] == ':' ) { // CF-Connecting-IP
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 16;
-              buf += 18;
-              mrr->ip = buf;
-              buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value, &headers[*num_headers].value_len, ret); 
-              mrr->ip_len = headers[*num_headers].value_len;
-              goto skipvalue;
-            }
-            break;
-            //printf( "%.*s\n" , 10, buf);
-            //printf( "Host: %08x == %08x\n" , CHAR4_TO_INT('o', 's', 't',':'), *((unsigned int *)(buf+1)));
-          case 'd':
-            if ( buf[4] == ':' ) { // Date:
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 4;
-              buf += 6;
-              goto hvalue;
-            }
-            if ( buf[3] == ':' ) { // DNT:       
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 3;
-              buf += 5;
-              goto hvalue;
-            }
-            break;
-          case 'x':
-            if ( buf[9] == ':' ) { // X-Real-IP
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 9;
-              buf += 11;
-              mrr->ip = buf;
-              buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value, &headers[*num_headers].value_len, ret); 
-              mrr->ip_len = headers[*num_headers].value_len;
-              goto skipvalue;
-            }
-            if ( buf[15] == ':' ) { // X-Forwarded-For:       
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 15;
-              buf += 17;
-              mrr->ip = buf;
-              buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value, &headers[*num_headers].value_len, ret); 
-              mrr->ip_len = headers[*num_headers].value_len;
-              goto skipvalue;
-              //goto hvalue;
-            }
-            if ( buf[16] == ':' ) { // X-Forwarded-Host:       
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 16;
-              buf += 18;
-              goto hvalue;
-            }
-            break;
-          case 'f':
-            if ( buf[5] == ':' ) { // From:
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 5;
-              buf += 7;
-              goto hvalue;
-            }
-            if ( buf[9] == ':' ) { // Forwarded:     
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 9;
-              buf += 11;
-              goto hvalue;
-            }
-            break;
-          case 'i': 
-            if ( buf[13] == ':' ) { // If-None-Match:  
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 13;
-              buf += 15;
-              goto hvalue;
-            }
-            if ( buf[17] == ':' ) { // If-Modified-Since:  
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 17;
-              buf += 19;
-              goto hvalue;
-            }
-            break;
-          case 'o':
-            headers[*num_headers].name = buf;
-            headers[*num_headers].name_len = 6;
-            buf += 8;
-            goto hvalue;
-          case 'r':
-            headers[*num_headers].name = buf;
-            headers[*num_headers].name_len = 7;
-            buf += 9;
-            goto hvalue;
-          case 't': // Transfer-Encoding:
-            headers[*num_headers].name = buf;
-            headers[*num_headers].name_len = 17;
-            buf += 19;
-            goto hvalue;
-          case 'u':
-            if ( buf[10] == ':' ) { // User-Agent:     
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 10;
-              buf += 12;
-              goto hvalue;
-            }
-            if ( buf[25] == ':' ) { // Upgrade-Insecure-Requests:     
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 25;
-              buf += 27;
-              goto hvalue;
-            }
-            break;
-          case 'a':
-            if ( buf[6] == ':' ) { // Accept: 
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 6;
-              buf += 8;
-              goto hvalue;
-            }
-            if ( buf[13] == ':' ) { // Authorization:   
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 13;
-              buf += 15;
-              goto hvalue;
-            }
-            if ( buf[14] == ':' ) { // Accept-Charset:           
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 14;
-              buf += 16;
-              goto hvalue;
-            }
-            if ( buf[15] == ':' ) { // Accept-Encoding: -Datetime
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 15;
-              buf += 17;
-              goto hvalue;
-            }
-            if ( buf[16] == ':' ) { // Accept-Language:
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 15;
-              buf += 17;
-              goto hvalue;
-            }
-            if ( buf[29] == ':' ) { // Access-Control-Request-Method:     
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 29;
-              buf += 31;
-              goto hvalue;
-            }
-            if ( buf[30] == ':' ) { // Access-Control-Request-Headers:     
-              headers[*num_headers].name = buf;
-              headers[*num_headers].name_len = 30;
-              buf += 32;
-              goto hvalue;
-            }
-            break;
-
-        }
-        if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
-            /* parsing name, but do not discard SP before colon, see
-             * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
-            headers[*num_headers].name = buf;
-            static const char ALIGNED(16) ranges1[] = "\x00 "  /* control chars and up to SP */
-                                                      "\"\""   /* 0x22 */
-                                                      "()"     /* 0x28,0x29 */
-                                                      ",,"     /* 0x2c */
-                                                      "//"     /* 0x2f */
-                                                      ":@"     /* 0x3a-0x40 */
-                                                      "[]"     /* 0x5b-0x5d */
-                                                      "{\377"; /* 0x7b-0xff */
-            int found;
-            buf = findchar(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
-            if (!found) {
-                CHECK_END();
-            }
-            while (1) {
-                if (*buf == ':') {
-                    break;
-                } else if (!token_char_map[(unsigned char)*buf]) {
-                    *ret = -1;
-                    return NULL;
-                }
-                ++buf;
-                CHECK_END();
-            }
-            if ((headers[*num_headers].name_len = buf - headers[*num_headers].name) == 0) {
-                *ret = -1;
-                return NULL;
-            }
-            ++buf;
-            for (;; ++buf) {
-                CHECK_END();
-                if (!(*buf == ' ' || *buf == '\t')) {
-                    break;
-                }
-            }
-        } else {
-            headers[*num_headers].name = NULL;
-            headers[*num_headers].name_len = 0;
-        }
-hvalue:
-        if ((buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value, &headers[*num_headers].value_len, ret)) == NULL) {
-            return NULL;
-        }
-skipvalue:
-      ;
-    }
-    return buf;
-}
-//#endif
-
 
 static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
                                  size_t *path_len, int *minor_version, struct mr_header *headers, size_t *num_headers,
@@ -701,11 +204,8 @@ static const char *parse_request(const char *buf, const char *buf_end, const cha
         *ret = -2;
         return NULL;
     }
-#ifdef __AVX2__
     return parse_headers_avx2(buf, buf_end, headers, num_headers, max_headers, ret, mrr);
-#else
-    return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret, mrr);
-#endif
+    //return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret, mrr);
 }
 
 static __inline__ unsigned long long rdtsc(void)
diff --git a/src/mrhttp/internals/parser.c b/src/mrhttp/internals/parser.c
index 6e7a6b0..3b9599a 100644
--- a/src/mrhttp/internals/parser.c
+++ b/src/mrhttp/internals/parser.c
@@ -22,6 +22,18 @@ static void print_buffer( char* b, int len ) {
   printf("\n");
 }
 
+static inline bool _isdigit(char c)  { return  c >= '0'  && c <= '9'; }
+static long my_strtol( char* s, int maxlen ) {
+  long l = 0;
+  int n = 0;
+  while (_isdigit(*s)) {
+
+    l = (l * 10) + (*s++ - '0');
+    n += 1;
+    if ( n >= maxlen ) return l;
+  }
+  return l;
+}
 
 static void _reset(Parser* self, bool reset_buffer) {
   self->body_length = 0;
@@ -57,8 +69,6 @@ int parser_data_received(Parser *self, PyObject *py_data, Request *request ) {
   DBG_PARSER printf("parser data\n%.*s\n",(int)datalen, data);
 
   // If we need more space increase the size of the buffer
-  // Can the headers be larger than our buffer size?
-// No, HTTP does not define any limit. However most web servers do limit size of headers they accept. For example in Apache default limit is 8KB, in IIS it's 16K. Server will return 413 Entity Too Large error if headers size exceeds that limit.
   DBG_PARSER printf("parser datalen %zu buflen %ld buffer size %d\n", datalen, (self->end-self->start), self->buf_size);
   if ( unlikely( (datalen+(self->end-self->start)) > self->buf_size) ) {
     while ( (datalen+(self->end-self->start)) > self->buf_size )  self->buf_size *= 2;
@@ -80,12 +90,11 @@ parse_headers:
 
   char *method, *path;
   int rc, minor_version;
-  //struct phr_header headers[100];
-  size_t method_len, path_len;//, num_headers;
+  size_t method_len, path_len;
 
   request->num_headers = 100; // Max allowed headers
   DBG_PARSER printf("before parser requests\n");
-  request->hreq.flags = 0; // TODO clear the mr_request struct
+  //request->hreq.flags = 0; // TODO This isn't currently used in the parser
   rc = mr_parse_request(self->start, self->end-self->start, (const char**)&method, &method_len, (const char**)&path, &path_len, &minor_version, request->headers, &(request->num_headers), &(request->hreq));
 
   DBG_PARSER printf("parser requests rc %d\n",rc);
@@ -107,41 +116,32 @@ parse_headers:
  
   //self->body_length = request->hreq.body_length;
 
-#define header_name_equal(val) \
+#define name_compare(val) \
   header->name_len == strlen(val) && fast_compare(header->name, val, header->name_len) == 0
-#define header_value_equal(val) \
+#define value_compare(val) \
   header->value_len == strlen(val) && fast_compare(header->value, val, header->value_len) == 0
 
  for(struct mr_header* header = request->headers;
       header < request->headers + request->num_headers;
       header++) {
 
-    if(header_name_equal("Content-Type")) {
-      if ( header->value[0] == 'a' && header->value[13] == 'r' ) { //"application/mrpacker"
+    if(name_compare("Content-Type")) {
+      if ( header->value[0] == 'a' && header->value_len == 20 ) { //"application/mrpacker"
         request->hreq.flags = 2;
       } 
     }
-    if(header_name_equal("Content-Length")) {
-      char * endptr = (char *)header->value + header->value_len;
-      self->body_length = strtol(header->value, &endptr, 10);
+    if(name_compare("Content-Length")) {
+      self->body_length = my_strtol(header->value, header->value_len);
 
-      // TODO If the request is too large       
+      // TODO If the request is too large.  I think we already checked
 
-      // 0 means error from strtol, but it is also a valid value
+      // Check for a bad 0 length - ie non digits for the length
       if ( self->body_length == 0 && !( header->value_len == 1 && *(header->value) == '0') ) { 
-        //TODO ERROR
-        //error = invalid_headers;
-        goto error;
-      }
-      // If the value was not all digits we'll error here
-      if(endptr != (char*)header->value + header->value_len) {
-        //TODO ERROR
-        //error = invalid_headers;
         goto error;
       }
 
-    } else if(header_name_equal("Connection")) {
-      if      (header_value_equal("close"))      request->keep_alive = false;
+    } else if(name_compare("Connection")) {
+      if      (value_compare("close"))      request->keep_alive = false;
     }
   } 
 
@@ -151,9 +151,6 @@ parse_headers:
 
   DBG_PARSER printf("body:\n%.*s\n", (int)(self->end-self->start),self->start);
 
-  // No body
-  //if ( self->body_length == 0 ) { }
-
   // Need more data
   if ( self->body_length > ( self->end - self->start ) ) {
     while ( (self->body_length+(self->end-self->start)) > self->buf_size )  self->buf_size *= 2;
@@ -176,9 +173,6 @@ parse_headers:
     }
 
   }
-  if ( request->hreq.ip != NULL ) {
-    request->py_ip = PyUnicode_FromStringAndSize(request->hreq.ip, request->hreq.ip_len);
-  }
 
   if(!Protocol_on_body(self->protocol, self->start, self->body_length)) return -1;
 
diff --git a/src/mrhttp/internals/request.c b/src/mrhttp/internals/request.c
index ffb9306..0a8c241 100644
--- a/src/mrhttp/internals/request.c
+++ b/src/mrhttp/internals/request.c
@@ -1,6 +1,4 @@
 
-
-
 #include <stddef.h>
 #include <sys/param.h>
 #include <strings.h>
@@ -39,7 +37,7 @@
   ((x <= '9' ? 0 : 9) + (x & 0x0f))
 #define is_hex(x) ((x >= '0' && x <= '9') || (x >= 'A' && x <= 'F'))
 
-#define CHAR4_INT(a, b, c, d)         \
+#define CHAR4_TO_INT(a, b, c, d)         \
    (unsigned int)((d << 24) | (c << 16) | (b << 8) | a)
 
 
@@ -54,6 +52,11 @@
 //static PyObject* request;
 
 
+static unsigned long TZCNT(unsigned long long in) {
+  unsigned long res;
+  asm("tzcnt %1, %0\n\t" : "=r"(res) : "r"(in));
+  return res;
+}
 
 
 PyObject* Request_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
@@ -78,7 +81,6 @@ void Request_dealloc(Request* self) {
   Py_XDECREF(self->py_args);
   Py_XDECREF(self->py_path);
   Py_XDECREF(self->py_method);
-  //Py_XDECREF(self->py_ip);
   Py_XDECREF(self->py_json);
   Py_XDECREF(self->py_mrpack);
   Py_XDECREF(self->py_form);
@@ -118,7 +120,6 @@ void Request_reset(Request *self) {
   Py_XDECREF(self->py_file);   self->py_file = NULL;
   Py_XDECREF(self->py_files);  self->py_files= NULL;
   Py_XDECREF(self->py_user);   self->py_user= NULL;
-  self->py_ip   = NULL;
   self->hreq.ip = NULL;
   self->hreq.flags = 0;
   Py_XDECREF(self->py_mrq_servers_down);  self->py_mrq_servers_down= NULL;
@@ -230,9 +231,157 @@ static inline size_t sse_decode(char* path, ssize_t length, size_t *qs_len) {
 }
 //#endif
 
+static inline int path_decode(char* buf, int len, int *qs_len) {
+  unsigned long msk;
+  int i=0,tz; // 32B index
+  int cnt = 0;
+  unsigned int shifted;
+  char *sbuf = buf;
+  char *obuf = buf;
+  char *buf_end = buf+len;
+  char *wbuf;
+  int found = 0;
+
+  __m256i m37 = _mm256_set1_epi8(37);
+  __m256i m63 = _mm256_set1_epi8(63);
+
+  do {
+    const char *block_start = obuf+64*i;
+    __m256i b0 = _mm256_loadu_si256((const __m256i *) block_start);
+    __m256i b1 = _mm256_loadu_si256((const __m256i *) (block_start+32));
+    msk = (unsigned int) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b0, m37), _mm256_cmpeq_epi8(b0, m63) ) )  |
+        ((unsigned long) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b1, m37), _mm256_cmpeq_epi8(b1, m63) ) ) << 32);
+
+    while (1) {
+
+      //if ( buf >= buf_end ) { goto decdone; }
+      shifted = buf-block_start;
+      if ( shifted >= 64 ) break;
+      tz = TZCNT((msk >> shifted));
+      if ( tz < 64 ) {
+        buf += tz;
+        //printf( " fnd >%.*s<\n", (int)(buf-sbuf), sbuf );  
+        if ( buf >= buf_end ) { goto decdone; }
+        if ( *buf == '?' ) {
+          len -= buf_end-buf;
+          *qs_len = buf_end-buf;
+          //printf("path_decode len %d path >%.*s<\n", (int)len, (int)len, obuf);
+          goto decdone;
+        }
+        if ( *buf == '%' ) {
+          if ( found ) {
+            memcpy( wbuf, sbuf, buf-sbuf );
+            wbuf += buf-sbuf;
+            *wbuf = (hex_to_dec(buf[1]) << 4) + hex_to_dec(buf[2]);
+            wbuf++;
+          } else {
+            found = 1;
+            *buf = (hex_to_dec(buf[1]) << 4) + hex_to_dec(buf[2]);
+            wbuf = buf+1;
+          }
+          len -= 2;
+          buf += 3;
+        }
+        sbuf = buf;
+      } else {
+        buf += 64 - shifted;
+        break;
+      }
+
+    }
+    i+=1;
+    if ( buf >= buf_end ) { goto decdone; }
+  } while ( buf < buf_end ); // Why doesn't this work
+
+decdone:
+  if ( found ) {
+    memcpy( wbuf, sbuf, buf_end-sbuf );
+  }
+  return len;
+}
+
+static inline PyObject* parse_query_args( char *buf, size_t len ) {
+  unsigned long long msk;
+  int i=0,tz; // 32B index
+  unsigned int shifted;
+  char *sbuf = buf;
+  char *obuf = buf;
+  int state = 0;
+  int ignore_me = 0;
+
+  PyObject* args = PyDict_New();
+  PyObject* key = NULL; PyObject* value = NULL;
+
+  if ( len == 0 ) return;
+  //len = path_decode( buf, len, &ignore_me );
+  char *buf_end = buf+len;
+
+  __m256i m38 = _mm256_set1_epi8(38); // &
+  __m256i m61 = _mm256_set1_epi8(61); // =
+  do {
+    const char *block_start = obuf+64*i;
+    __m256i b0 = _mm256_loadu_si256((const __m256i *) block_start);
+    __m256i b1 = _mm256_loadu_si256((const __m256i *) (block_start+32));
+    msk = (unsigned int) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b0, m38), _mm256_cmpeq_epi8(b0, m61) ) )  |
+        ((unsigned long) _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b1, m38), _mm256_cmpeq_epi8(b1, m61) ) ) << 32);
+    while (1) {
+
+      //if ( buf >= buf_end ) { goto decdone; }
+      shifted = buf-block_start;
+      if ( shifted >= 64 ) break;
+      tz = TZCNT((msk >> shifted));
+      if ( tz < 64 ) {
+        buf += tz;
+        if ( buf >= buf_end ) { goto pdone; }
+        if ( *buf == '=' ) {
+          if ( state == 1 ) { buf+=1; continue; }
+          //printf( " key >%.*s<\n", (int)(buf-sbuf), sbuf );  
+          //key = PyUnicode_FromStringAndSize(sbuf, buf-sbuf); 
+          len = path_decode( sbuf, buf-sbuf, &ignore_me );
+          key = PyUnicode_FromStringAndSize(sbuf, len);
+
+          state = 1;
+          buf += 1;
+        }
+        else if ( *buf == '&' ) {
+          if ( state == 0 ) { buf+=1; continue; }
+          //printf( " val >%.*s<\n", (int)(buf-sbuf), sbuf);
+          //value = PyUnicode_FromStringAndSize(sbuf, buf-sbuf); 
+          len = path_decode( sbuf, buf-sbuf, &ignore_me );
+          value = PyUnicode_FromStringAndSize(sbuf, len);
+          PyDict_SetItem(args, key, value);  
+          Py_XDECREF(key);
+          Py_XDECREF(value);
+
+          state = 0;
+          buf+=1;
+        }
+        sbuf = buf;
+      } else {
+        buf += 64 - shifted;
+        break;
+      }
+
+    }
+    i+=1;
+    if ( buf >= buf_end ) { goto pdone; }
+  } while ( buf < buf_end ); // Why doesn't this work
+
+pdone:
+  //printf( " done >%.*s<\n", (int)(buf_end-sbuf), sbuf );  
+  //value = PyUnicode_FromStringAndSize(sbuf, buf_end-sbuf); 
+  len = path_decode( sbuf, buf_end-sbuf, &ignore_me );
+  value = PyUnicode_FromStringAndSize(sbuf, len);
+  PyDict_SetItem(args, key, value);  
+  Py_XDECREF(key);
+  Py_XDECREF(value);
+  return args;
+}
+
+
 void request_decodePath(Request* self) {
   if(!self->path_decoded) {
-    self->path_len = sse_decode( self->path, self->path_len, &(self->qs_len) );
+    self->path_len = path_decode( self->path, self->path_len, &(self->qs_len) );
     self->path_decoded = true;
   }
 }
@@ -322,137 +471,149 @@ PyObject* Request_get_headers(Request* self, void* closure) {
   Py_XINCREF(self->py_headers);
   return self->py_headers;
 }
-PyObject* Request_get_ip(Request* self, void* closure) {
-  if(!self->py_ip) {
-    if ( self->hreq.ip_len ) {
-      self->py_ip = PyUnicode_FromStringAndSize(self->hreq.ip, self->hreq.ip_len);
-    } else {
-      self->py_ip = Py_None;
-    }
-  }
-  Py_INCREF(self->py_ip);
-  return self->py_ip;
-}
 
 static inline PyObject* parseCookies( Request* r, char *buf, size_t buflen ) {
-  char *end = buf + buflen;
-  char *last = buf;
+  unsigned int msk;
+  int i=0,tz; // 32B index
+  int cnt = 0;
+  unsigned int shifted;
+  const char *sbuf = buf;
+  const char *obuf = buf;
+  const char *buf_end = buf+buflen;
+  int name_or_value = 0;
+  int found = 0;
+
+  __m256i m59 = _mm256_set1_epi8(59);
+  __m256i m61 = _mm256_set1_epi8(61);
+
   PyObject* cookies = PyDict_New();
   PyObject* key = NULL; PyObject* value = NULL;
 
   DBG printf("parse cookies: %.*s\n",(int)buflen, buf);
 
-  static char ALIGNED(16) ranges1[] = "==" ";;" "\x00 "; // Control chars up to space illegal
-  int found;
-  int state = 0;
-  int grab_session = 0;
-//Cookie: key=session_key; bar=2; nosemi=foo
-  do { 
-    last = buf;
-    buf = findchar(buf, end, ranges1, sizeof(ranges1) - 1, &found);
-    if ( found ) {
-      if ( *buf == '=' ) {
-        if ( state == 0 ) {
-          // Save out the mrsession id 
-          if ( buf-last == 9 && ( *((unsigned int *)(last)) == CHAR4_INT('m', 'r', 's','e') ) ) {
-            DBG printf("Grab session\n");
-            grab_session = 1;
+  do {
+    const char *block_start = obuf+32*i;
+    __m256i b0 = _mm256_loadu_si256((const __m256i *) block_start);
+    msk = _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b0, m59), _mm256_cmpeq_epi8(b0, m61) ) );
+    while (1) {
+      //if ( buf >= buf_end ) { goto sesdone; }
+      shifted = buf-block_start;
+      if ( shifted >= 32 ) break;
+      tz = TZCNT((msk >> shifted));
+      if ( tz < 32 ) {
+        buf += tz;
+        DBG printf( " fnd >%.*s<\n", buf-sbuf, sbuf );  
+        if ( buf >= buf_end ) { goto sesdone; }
+        if ( name_or_value == 1 ) {
+          if ( *buf == '=' ) { buf += 1; continue; } // = in value field
+          if ( found ) {
+            DBG printf("session key %.*s\n", (int)(buf-sbuf), sbuf);
+            r->session_id = sbuf;
+            r->session_id_sz = buf-sbuf;
           }
-          key = PyUnicode_FromStringAndSize(last, buf-last); //TODO error
-          DBG printf("session key %.*s\n", (int)(buf-last), last);
-          state = 1;
+          value = PyUnicode_FromStringAndSize(sbuf, buf-sbuf); //TODO error
+          PyDict_SetItem(cookies, key, value);  //  == -1) goto loop_error;
+          Py_XDECREF(key);
+          Py_XDECREF(value);
           buf+=1;
+          name_or_value = 0;
         } else {
-          // If we're in the value ignore the = so cookie name/value splits on the first =
-          while(found && *buf == '=') buf = findchar(++buf, end, ranges1, sizeof(ranges1) - 1, &found);
-        }
-      } 
-      else if ( *buf == ';' ) {
-        if ( state == 0 ) key  = PyUnicode_FromString("");
-        if (grab_session) {
-          grab_session = 0;
-          r->session_id = last;
-          r->session_id_sz = buf-last;
+          key = PyUnicode_FromStringAndSize(sbuf, buf-sbuf); //TODO error
+          if ( buf-sbuf == 9 && ( *((unsigned int *)(sbuf)) == CHAR4_TO_INT('m', 'r', 's','e') ) ) {
+            found = 1;
+          }
+          name_or_value = 1;
         }
-        value = PyUnicode_FromStringAndSize(last, buf-last); //TODO error
-        DBG printf(" value %.*s\n", (int)(buf-last), last);
-        state = 0;
-        PyDict_SetItem(cookies, key, value);  //  == -1) goto loop_error;
-        Py_XDECREF(key);
-        Py_XDECREF(value);
-        buf+=1;
-        while ( *buf == 32 ) buf++;
-      }
-      else {
-        // Bad character found so skip
-        state = 0;
-        while(found && *buf != ';') buf = findchar(++buf, end, ranges1, sizeof(ranges1) - 1, &found);
-        if ( buf != end ) buf += 1;
-        while ( *buf == 32 ) buf++;
+        buf += 1;
+        sbuf = buf;
+      } else {
+        buf += 32 - shifted;
+        break;
       }
-      //else if(*buf == '%' && is_hex(*(buf + 1)) && is_hex(*(buf + 2))) {
-        //*write = (hex_to_dec(*(buf + 1)) << 4) + hex_to_dec(*(buf + 2));
-        //write+=1;
-        //length -= 2;
-      //}
-    }
-  } while( found );
-
-  // If the trailing ; is left off we need to finish up
-  if (state) {
-    if (grab_session) {
-      grab_session = 0;
-      r->session_id = last;
-      r->session_id_sz = buf-last;
-      DBG printf("session2 %.*s\n", r->session_id_sz, r->session_id);
+
     }
-    value = PyUnicode_FromStringAndSize(last, buf-last); //TODO error
-    PyDict_SetItem(cookies, key, value);  //  == -1) goto loop_error;
+    i+=1;
+    if ( buf >= buf_end ) { goto sesdone; }
+  } while ( buf-obuf < buf_end-obuf );
+
+sesdone:
+  if ( found ) {
+    r->session_id = sbuf;
+    r->session_id_sz = buf_end-sbuf;
+  }
+  if ( name_or_value ) {
+    value = PyUnicode_FromStringAndSize(sbuf, buf_end-sbuf); //TODO error
+    PyDict_SetItem(cookies, key, value); 
     Py_XDECREF(key);
     Py_XDECREF(value);
   }
-
   return cookies;
 }
+
 static inline void getSession( Request* r, char *buf, size_t buflen ) {
-  char *end = buf + buflen;
-  char *last = buf;
+  unsigned int msk;
+  int i=0,tz; // 32B index
+  int cnt = 0;
+  unsigned int shifted;
+  const char *sbuf = buf;
+  const char *obuf = buf;
+  const char *buf_end = buf+buflen;
+  int name_or_value = 0;
+  int found = 0;
+
+  __m256i m59 = _mm256_set1_epi8(59);
+  __m256i m61 = _mm256_set1_epi8(61);
 
-  static char ALIGNED(16) ranges1[] = "==" ";;";
-  int found;
-  int state = 0;
-  do { 
-    last = buf;
-    buf = findchar(buf, end, ranges1, sizeof(ranges1) - 1, &found);
-    if ( found ) {
-      if ( *buf == '=' ) {
-        if ( state == 0 ) {
-          // Save out the mrsession id 
-          if ( buf-last == 9 && ( *((unsigned int *)(last)) == CHAR4_INT('m', 'r', 's','e') ) ) {
-            DBG printf("Grab session\n");
-            state = 1;
+  do {
+    const char *block_start = obuf+32*i;
+    __m256i b0 = _mm256_loadu_si256((const __m256i *) block_start);
+    msk = _mm256_movemask_epi8( _mm256_or_si256(_mm256_cmpeq_epi8(b0, m59), _mm256_cmpeq_epi8(b0, m61) ) );
+    while (1) {
+      //if ( buf >= buf_end ) { goto sesdone; }
+      shifted = buf-block_start;
+      if ( shifted >= 32 ) break;
+      tz = TZCNT((msk >> shifted));
+      if ( tz < 32 ) {
+        buf += tz;
+        //printf( " fnd >%.*s<\n", buf-sbuf, sbuf );  
+        if ( buf >= buf_end ) { goto sesdone; }
+        if ( name_or_value == 1 ) {
+          if ( *buf == '=' ) { buf += 1; continue; } // = in value field
+          if ( found ) {
+            //printf( " done >%.*s<\n", buf-sbuf, sbuf );  
+            r->session_id = sbuf;
+            r->session_id_sz = buf-sbuf;
+            return;
           }
           buf+=1;
-        } 
-      } 
-      else if ( *buf == ';' ) {
-        if (state == 1 ) {
-          r->session_id = last;
-          r->session_id_sz = buf-last;
-          return;
+          name_or_value = 0;
+        } else {
+          if ( buf-sbuf == 9 && ( *((unsigned int *)(sbuf)) == CHAR4_TO_INT('m', 'r', 's','e') ) ) {
+            found = 1;
+          }
+          name_or_value = 1;
         }
-        state = 0;
-        buf+=1;
-        while ( *buf == 32 ) buf++;
+        buf += 1;
+        sbuf = buf;
+      } else {
+        buf += 32 - shifted;
+        break;
       }
+
     }
-  } while( found );
-  if (state) {
-    r->session_id = last;
-    r->session_id_sz = buf-last;
+    i+=1;
+    if ( buf >= buf_end ) { goto sesdone; }
+  } while ( buf-obuf < buf_end-obuf );
+
+sesdone:
+  if ( found ) {
+    r->session_id = sbuf;
+    r->session_id_sz = buf-sbuf;
+    //printf( " sesdone >%.*s<\n", buf-sbuf, sbuf );  
   }
 }
 
+
 static inline PyObject* Request_decode_cookies(Request* self)
 {
   for(struct mr_header* header = self->headers; header < self->headers + self->num_headers; header++) {
@@ -492,67 +653,6 @@ PyObject* Request_get_body(Request* self, void* closure)
   return self->py_body;
 }
 
-static inline PyObject* parse_query_args( char *buf, size_t buflen ) {
-  char *end = buf + buflen;
-  char *last = buf;
-  PyObject* args = PyDict_New();
-
-  if ( buflen == 0 ) return args;
-
-  PyObject* key = NULL; PyObject* value = NULL;
-
-  static char ALIGNED(16) ranges1[] = "==" "&&";
-  int found;
-  int state = 0;
-  int grab_session = 0;
-  size_t len;
-  // foo=bar&key=23%28
-  do { 
-    buf = findchar(buf, end, ranges1, sizeof(ranges1) - 1, &found);
-    if ( found ) {
-      if ( *buf == '=' ) {
-        len = sse_decode( last, buf-last, NULL );
-        key = PyUnicode_FromStringAndSize(last, len); //TODO error
-        state = 1;
-        buf+=1;
-        last = buf;
-      } 
-      else if ( *buf == '&' ) {
-        if ( state == 0 ) key  = PyUnicode_FromString("");
-
-        len = sse_decode( last, buf-last, NULL );
-        value = PyUnicode_FromStringAndSize(last, len);
-        state = 0;
-        PyDict_SetItem(args, key, value);  //  == -1) goto loop_error;
-        Py_XDECREF(key);
-        Py_XDECREF(value);
-        buf+=1;
-        while ( *buf == 32 ) buf++;
-        last = buf;
-      }
-      else {
-        printf(" ERR found not = or ; %.*s\n", 5, buf );
-      }
-    }
-  } while( found );
-
-  if ( buf == end ) {
-    if ( state == 0 ) key  = PyUnicode_FromString("");
-    if ( buf == end && *(buf-1) == ' ' ) {
-      len = sse_decode( last, buf-last-1, NULL );
-      value = PyUnicode_FromStringAndSize(last, len); //TODO error
-    } else {
-      len = sse_decode( last, buf-last, NULL );
-      value = PyUnicode_FromStringAndSize(last, len); //TODO error
-    }
-    state = 0;
-    PyDict_SetItem(args, key, value);  //  == -1) goto loop_error;
-    Py_XDECREF(key);
-    Py_XDECREF(value);
-  }
-
-  return args;
-}
 
 
 PyObject* Request_get_path(Request* self, void* closure)
@@ -608,6 +708,7 @@ PyObject* Request_notfound(Request* self)
   Py_RETURN_NONE;
 }
 
+
 PyObject* Request_parse_mp_form(Request* self) {
 
 
@@ -743,7 +844,7 @@ PyObject* Request_parse_mp_form(Request* self) {
           body = name = filename = content_type = NULL;
 
         }
-        if ( p[2+bndlen] == '-' ) break; // Last boundary
+        if ( p[2+bndlen] == '-' ) break; // Last boundary has -- appended
         state = 1;
       }
     }
@@ -805,8 +906,8 @@ PyObject* Request_parse_mp_form(Request* self) {
     
     //if ( state == 2 ) {
     //}
-
-    p = findchar(p, pend, crlf, sizeof(crlf) - 1, &found);
+    //p = findchar(p, pend, crlf, sizeof(crlf) - 1, &found);
+    p = my_get_eol(p, pend);
     p += 2;
   }
 
diff --git a/src/mrhttp/internals/request.h b/src/mrhttp/internals/request.h
index 0376864..b659096 100644
--- a/src/mrhttp/internals/request.h
+++ b/src/mrhttp/internals/request.h
@@ -45,7 +45,6 @@ struct Request {
   PyObject* py_method;
   PyObject* transport;
   PyObject* app;
-  PyObject* py_ip;
   PyObject* py_headers;
   PyObject* py_cookies;
   PyObject* py_body;
diff --git a/src/mrhttp/internals/router.c b/src/mrhttp/internals/router.c
index 81ca17e..5d4df8b 100644
--- a/src/mrhttp/internals/router.c
+++ b/src/mrhttp/internals/router.c
@@ -178,8 +178,8 @@ Route* router_getRoute(Router* self, Request* request) {
 
   Route *r = self->staticRoutes;
   for (int i = 0; i<self->numStaticRoutes; i++,r++ ) {
-    DBG printf("request path len %d - %.*s\n", (int)request->path_len, (int)request->path_len, request->path);
-    DBG printf("route path %.*s \n", (int)r->len, r->path);
+    //DBG printf("request path len %d - %.*s\n", (int)request->path_len, (int)request->path_len, request->path);
+    //DBG printf("route path %.*s \n", (int)r->len, r->path);
     if ( plen == r->len && !memcmp(r->path, request->path, plen) ) {
       DBG printf("router found path %.*s == %.*s\n", (int)r->len, r->path, (int)request->path_len, request->path);
       return r;
diff --git a/src/mrhttp/internals/utils.c b/src/mrhttp/internals/utils.c
index 792bfbc..1be2952 100644
--- a/src/mrhttp/internals/utils.c
+++ b/src/mrhttp/internals/utils.c
@@ -33,23 +33,13 @@ PyObject* myrandint(PyObject* self, PyObject* args)
 #define unlikely(x) (x)
 #endif
 
-// Valgrind doesn't support mm_cmpestri so replace findchar
-char *valgrind_zfindchar(char *buf, char *buf_end, char *ranges, size_t ranges_size, int *found)
-{
-    //printf("DELME ranges sz %d\n", ranges_size);
-  *found = 0;
-  char *p = buf;
-  while ( p < buf_end ) {
-    for ( int i = 0; i < ranges_size; i += 2 ) {
-      if ( p >= ranges[i] && p <= ranges[i+1] ) {
-        *found = 1;
-        return p;
-    	}
-    }
-    p++;
-  }
-  return p;    
+unsigned long TZCNT(unsigned long long in) {
+  unsigned long res;
+  asm("tzcnt %1, %0\n\t" : "=r"(res) : "r"(in));
+  return res;
 }
+
+
 // Search for a range of characters and return a pointer to the location or buf_end if none are found
 char *findchar(char *buf, char *buf_end, char *ranges, size_t ranges_size, int *found)
 {
@@ -91,6 +81,26 @@ char *findchar(char *buf, char *buf_end, char *ranges, size_t ranges_size, int *
     return buf;
 }
 
+char *my_get_eol(char *buf, char *buf_end) {
+  const char *start = buf;
+  __m256i m13 = _mm256_set1_epi8(13);
+  while (1)
+  {
+    __m256i v0 = _mm256_loadu_si256((const __m256i *)buf);
+    __m256i v1 = _mm256_cmpeq_epi8(v0, m13);
+    unsigned long vmask = _mm256_movemask_epi8(v1);
+    if (vmask != 0) {
+        buf += TZCNT(vmask);
+        if ( buf > buf_end ) return buf_end;
+        break;
+    }
+    buf += 32; //pSrc1++;                 
+    if ( buf >= buf_end ) return buf_end;
+  }
+  return buf;
+}
+
+
 static char escbuf[16*1024];
 
 
diff --git a/src/mrhttp/request.py b/src/mrhttp/request.py
index e793a51..83a33c5 100755
--- a/src/mrhttp/request.py
+++ b/src/mrhttp/request.py
@@ -20,6 +20,7 @@ class Request(mrhttp.CRequest):
   response = mrhttp.Response()
   def __init__(self):
     super().__init__(self)
+    self._ip = None
     pass
 
   def parsed_content_type(self):
@@ -64,6 +65,18 @@ class Request(mrhttp.CRequest):
         self.parse_mp_form()
     return self._file
 
+  @property
+  def ip(self):
+    if self._ip == None:
+      self._ip = self.headers.get("CF-Connecting-IP")
+    if self._ip == None:       
+      self._ip = self.headers.get("X-Real-IP")
+    if self._ip == None:       
+      self._ip = self.headers.get("X-Forwarded-For")
+
+    return self._ip
+
+
   @property
   def files(self):
     if self._files == None:
diff --git a/tests/load_session.py b/tests/load_session.py
new file mode 100644
index 0000000..fd83661
--- /dev/null
+++ b/tests/load_session.py
@@ -0,0 +1,36 @@
+
+import asyncio, time
+import asyncmrcache, mrpacker
+
+import tracemalloc
+tracemalloc.start()
+
+import uvloop
+asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
+
+def lcb(client):
+  print("Lost connection")
+
+async def run(loop):
+
+  rc = await asyncmrcache.create_client( [("localhost",7000)], loop, lost_cb=lcb)
+
+  k = b"mrsession43709dd361cc443e976b05714581a7fb"
+  user = {"user":"test","id":12 }
+
+  await rc.set(k,mrpacker.pack(user))
+  print(await rc.get(k))
+
+  k = b"43709dd361cc443e976b05714581a7fb"
+  await rc.set(k,mrpacker.pack(user))
+  print(await rc.get(k))
+
+  await rc.close()
+
+if __name__ == '__main__':
+  loop = asyncio.get_event_loop()
+  loop.run_until_complete(run(loop))
+  loop.close()
+  print("DONE")
+
+
diff --git a/tests/lua/bighdr.lua b/tests/lua/bighdr.lua
new file mode 100755
index 0000000..cf87f16
--- /dev/null
+++ b/tests/lua/bighdr.lua
@@ -0,0 +1,43 @@
+-- example script demonstrating HTTP pipelining
+
+init = function(args)
+  local r = {}
+  wrk.headers["Content-Type"] = "application/x-www-form-urlencoded"
+  wrk.headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) Gecko/20130501 Firefox/30.0 AppleWebKit/600.00 Chrome/30.0.0000.0 Trident/10.0 Safari/600.00"
+  wrk.headers["Cookie"] = "mrsession=43709dd361cc443e976b05714581a7fb; foo=fdsfdasdfasdfdsfasdfsdfsdfasdfas; short=fazc;"
+  wrk.headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
+  wrk.headers["Accept-Language"] = "en-US,en;q=0.5"
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+  table.insert(r, wrk.format(nil, "/"))
+
+  req = table.concat(r)
+end
+
+request = function()
+   return req
+end
diff --git a/tests/lua/form.lua b/tests/lua/form.lua
index 63b6ce8..8b6278b 100755
--- a/tests/lua/form.lua
+++ b/tests/lua/form.lua
@@ -1,3 +1,3 @@
 wrk.method = "POST"
-wrk.body   = 'param1=value1&param2=value2'
+wrk.body   = 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
 wrk.headers["Content-Type"] = "application/x-www-form-urlencoded"
diff --git a/tests/lua/pipeline.lua b/tests/lua/pipeline.lua
index 5686e19..bc37741 100755
--- a/tests/lua/pipeline.lua
+++ b/tests/lua/pipeline.lua
@@ -18,6 +18,22 @@ init = function(args)
    r[14] = wrk.format(nil, "/")
    r[15] = wrk.format(nil, "/")
    r[16] = wrk.format(nil, "/")
+   r[17] = wrk.format(nil, "/")
+   r[18] = wrk.format(nil, "/")
+   r[19] = wrk.format(nil, "/")
+   r[20] = wrk.format(nil, "/")
+   r[21] = wrk.format(nil, "/")
+   r[22] = wrk.format(nil, "/")
+   r[23] = wrk.format(nil, "/")
+   r[24] = wrk.format(nil, "/")
+   r[25] = wrk.format(nil, "/")
+   r[26] = wrk.format(nil, "/")
+   r[27] = wrk.format(nil, "/")
+   r[28] = wrk.format(nil, "/")
+   r[29] = wrk.format(nil, "/")
+   r[30] = wrk.format(nil, "/")
+   r[31] = wrk.format(nil, "/")
+   r[32] = wrk.format(nil, "/")
    req = table.concat(r)
 end
 
diff --git a/tests/lua/q-form.lua b/tests/lua/q-form.lua
new file mode 100755
index 0000000..a752f57
--- /dev/null
+++ b/tests/lua/q-form.lua
@@ -0,0 +1,31 @@
+-- example script demonstrating HTTP pipelining
+
+init = function(args)
+   local r = {}
+   wrk.headers["Content-Type"] = "application/x-www-form-urlencoded"
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   table.insert(r, wrk.format('POST','/form', nil, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'))
+   req = table.concat(r)
+end
+
+request = function()
+   return req
+end
diff --git a/tests/lua/q-form2.lua b/tests/lua/q-form2.lua
new file mode 100755
index 0000000..49d7e1e
--- /dev/null
+++ b/tests/lua/q-form2.lua
@@ -0,0 +1,26 @@
+-- example script demonstrating HTTP pipelining
+
+init = function(args)
+   local r = {}
+   wrk.headers["Content-Type"] = "application/x-www-form-urlencoded"
+   r[1]  = wrk.format('POST','/form', {"Content-Type", "application/x-www-form-urlencoded"}, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
+   r[2]  = wrk.format('POST','/form', {"Content-Type", "application/x-www-form-urlencoded"}, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
+   r[3]  = wrk.format('POST','/form', {"Content-Type", "application/x-www-form-urlencoded"}, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
+   r[4]  = wrk.format('POST','/form', {"Content-Type", "application/x-www-form-urlencoded"}, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
+   r[5]  = wrk.format('POST','/form', {"Content-Type", "application/x-www-form-urlencoded"}, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
+   r[6]  = wrk.format('POST','/form', {"Content-Type", "application/x-www-form-urlencoded"}, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
+   r[7]  = wrk.format('POST','/form', {"Content-Type", "application/x-www-form-urlencoded"}, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
+   r[8]  = wrk.format('POST','/form', {"Content-Type", "application/x-www-form-urlencoded"}, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
+   r[9]  = wrk.format('POST','/form', {"Content-Type", "application/x-www-form-urlencoded"}, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
+   r[10]  = wrk.format('POST','/form', {"Content-Type", "application/x-www-form-urlencoded"}, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
+   r[11]  = wrk.format('POST','/form', {"Content-Type", "application/x-www-form-urlencoded"}, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
+   r[12]  = wrk.format('POST','/form', {"Content-Type", "application/x-www-form-urlencoded"}, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
+   r[13]  = wrk.format('POST','/form', {"Content-Type", "application/x-www-form-urlencoded"}, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
+   r[14]  = wrk.format('POST','/form', {"Content-Type", "application/x-www-form-urlencoded"}, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
+   r[15]  = wrk.format('POST','/form', {"Content-Type", "application/x-www-form-urlencoded"}, 'param1=value1&param2=value2&c%C3%B3mo=puedes&fffffffffffffffffffffffffffffffffffff%20ffffffffffffffffff=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
+   req = table.concat(r)
+end
+
+request = function()
+   return req
+end
diff --git a/tests/lua/q-json.lua b/tests/lua/q-json.lua
new file mode 100755
index 0000000..9e35e33
--- /dev/null
+++ b/tests/lua/q-json.lua
@@ -0,0 +1,26 @@
+-- example script demonstrating HTTP pipelining
+
+init = function(args)
+   local r = {}
+   wrk.headers["Content-Type"] = "application/json; charset=utf-8"
+   r[1]  = wrk.format('POST','/q/1/2/', {"Content-Type", "application/json"}, '{"my":"json"}')
+   r[2]  = wrk.format('POST','/q/1/2/', {"Content-Type", "application/json"}, '{"my":"json"}')
+   r[3]  = wrk.format('POST','/q/1/2/', {"Content-Type", "application/json"}, '{"my":"json"}')
+   r[4]  = wrk.format('POST','/q/1/2/', {"Content-Type", "application/json"}, '{"my":"json"}')
+   r[5]  = wrk.format('POST','/q/1/2/', {"Content-Type", "application/json"}, '{"my":"json"}')
+   r[6]  = wrk.format('POST','/q/1/2/', {"Content-Type", "application/json"}, '{"my":"json"}')
+   r[7]  = wrk.format('POST','/q/1/2/', {"Content-Type", "application/json"}, '{"my":"json"}')
+   r[8]  = wrk.format('POST','/q/1/2/', {"Content-Type", "application/json"}, '{"my":"json"}')
+   r[9]  = wrk.format('POST','/q/1/2/', {"Content-Type", "application/json"}, '{"my":"json"}')
+   r[10] = wrk.format('POST','/q/1/2/', {"Content-Type", "application/json"}, '{"my":"json"}')
+   r[11] = wrk.format('POST','/q/1/2/', {"Content-Type", "application/json"}, '{"my":"json"}')
+   r[12] = wrk.format('POST','/q/1/2/', {"Content-Type", "application/json"}, '{"my":"json"}')
+   r[13] = wrk.format('POST','/q/1/2/', {"Content-Type", "application/json"}, '{"my":"json"}')
+   r[14] = wrk.format('POST','/q/1/2/', {"Content-Type", "application/json"}, '{"my":"json"}')
+   r[15] = wrk.format('POST','/q/1/2/', {"Content-Type", "application/json"}, '{"my":"json"}')
+   req = table.concat(r)
+end
+
+request = function()
+   return req
+end
diff --git a/tests/lua/q-mrp.lua b/tests/lua/q-mrp.lua
new file mode 100755
index 0000000..f60a979
--- /dev/null
+++ b/tests/lua/q-mrp.lua
@@ -0,0 +1,28 @@
+-- example script demonstrating HTTP pipelining
+
+init = function(args)
+   local r = {}
+   wrk.headers["Content-Type"] = "application/mrpacker"
+   wrk.headers["Cookie"] = "mrsession=43709dd361cc443e976b05714581a7fb"
+   table.insert(r, wrk.format('POST','/mrq/1', nil, string.char(0x44,0xc1,0xc2,0xc3,0xc4)))
+   table.insert(r, wrk.format('POST','/mrq/1', nil, string.char(0x44,0xc1,0xc2,0xc3,0xc4)))
+   table.insert(r, wrk.format('POST','/mrq/1', nil, string.char(0x44,0xc1,0xc2,0xc3,0xc4)))
+   table.insert(r, wrk.format('POST','/mrq/1', nil, string.char(0x44,0xc1,0xc2,0xc3,0xc4)))
+   table.insert(r, wrk.format('POST','/mrq/1', nil, string.char(0x44,0xc1,0xc2,0xc3,0xc4)))
+   table.insert(r, wrk.format('POST','/mrq/1', nil, string.char(0x44,0xc1,0xc2,0xc3,0xc4)))
+   table.insert(r, wrk.format('POST','/mrq/1', nil, string.char(0x44,0xc1,0xc2,0xc3,0xc4)))
+   table.insert(r, wrk.format('POST','/mrq/1', nil, string.char(0x44,0xc1,0xc2,0xc3,0xc4)))
+   table.insert(r, wrk.format('POST','/mrq/1', nil, string.char(0x44,0xc1,0xc2,0xc3,0xc4)))
+   table.insert(r, wrk.format('POST','/mrq/1', nil, string.char(0x44,0xc1,0xc2,0xc3,0xc4)))
+   table.insert(r, wrk.format('POST','/mrq/1', nil, string.char(0x44,0xc1,0xc2,0xc3,0xc4)))
+   table.insert(r, wrk.format('POST','/mrq/1', nil, string.char(0x44,0xc1,0xc2,0xc3,0xc4)))
+   table.insert(r, wrk.format('POST','/mrq/1', nil, string.char(0x44,0xc1,0xc2,0xc3,0xc4)))
+   table.insert(r, wrk.format('POST','/mrq/1', nil, string.char(0x44,0xc1,0xc2,0xc3,0xc4)))
+   table.insert(r, wrk.format('POST','/mrq/1', nil, string.char(0x44,0xc1,0xc2,0xc3,0xc4)))
+   req = table.concat(r)
+end
+
+request = function()
+   return req
+end
+
diff --git a/tests/lua/q-session.lua b/tests/lua/q-session.lua
new file mode 100755
index 0000000..2a6133f
--- /dev/null
+++ b/tests/lua/q-session.lua
@@ -0,0 +1,37 @@
+-- example script demonstrating HTTP pipelining
+
+init = function(args)
+   local r = {}
+   wrk.headers["Cookie"] = "mrsession=43709dd361cc443e976b05714581a7fb"
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   table.insert(r, wrk.format(nil, "/s"))
+   req = table.concat(r)
+end
+
+request = function()
+   return req
+end
diff --git a/tests/lua/q-upload.lua b/tests/lua/q-upload.lua
new file mode 100755
index 0000000..61a839c
--- /dev/null
+++ b/tests/lua/q-upload.lua
@@ -0,0 +1,49 @@
+-- File upload pipelined
+
+function read_txt_file(path)
+    local file, errorMessage = io.open(path, "r")
+    if not file then 
+        error("Could not read the file:" .. errorMessage .. "\n")
+    end
+
+    local content = file:read "*all"
+    file:close()
+    return content
+end
+
+init = function(args)
+  local r = {}
+  local Boundary = "----WebKitFormBoundaryePkpFF7tjBAqx29L"
+  local BodyBoundary = "--" .. Boundary
+  local LastBoundary = "--" .. Boundary .. "--"
+  
+  local CRLF = "\r\n"
+  
+  local FileBody = read_txt_file("tests/lua/test.txt")
+  
+  local Filename = "test.txt"
+  
+  local ContentDisposition = "Content-Disposition: form-data; name=\"file\"; filename=\"" .. Filename .. "\""
+  
+  wrk.method = "POST"
+  wrk.headers["Content-Type"] = "multipart/form-data; boundary=" .. Boundary
+  --wrk.headers["Cookie"] = "mrsession=43709dd361cc443e976b05714581a7fb"
+  local body = BodyBoundary .. CRLF .. ContentDisposition .. CRLF .. CRLF .. FileBody .. CRLF .. LastBoundary .. CRLF
+
+  table.insert(r, wrk.format(nil, "/", nil, body))
+  table.insert(r, wrk.format(nil, "/", nil, body))
+  table.insert(r, wrk.format(nil, "/", nil, body))
+  table.insert(r, wrk.format(nil, "/", nil, body))
+  table.insert(r, wrk.format(nil, "/", nil, body))
+  table.insert(r, wrk.format(nil, "/", nil, body))
+  table.insert(r, wrk.format(nil, "/", nil, body))
+  table.insert(r, wrk.format(nil, "/", nil, body))
+  table.insert(r, wrk.format(nil, "/", nil, body))
+  req = table.concat(r)
+end
+
+request = function()
+   return req
+end
+
+
diff --git a/tests/lua/test.txt b/tests/lua/test.txt
index 802f69c..572486c 100644
--- a/tests/lua/test.txt
+++ b/tests/lua/test.txt
@@ -1,3 +1,30 @@
 Line one
 Line two
 Line three
+Line threeaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+Line threeaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+Line three
+Line three
+Line threeaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+Line three
+Line three
+Line three
+Line three
+Line three
+Line three
+Line three
+Line three
+Line three
+Line three
+Line three
+Line three
+Line three
+Line three
+Line three
+Line threeaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+Line threeaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+Line three
+Line three
+Line three
+Line three
+Line three
diff --git a/tests/lua/upload.lua b/tests/lua/upload.lua
index 354ddfa..eff8f93 100644
--- a/tests/lua/upload.lua
+++ b/tests/lua/upload.lua
@@ -18,12 +18,6 @@ local CRLF = "\r\n"
 
 local FileBody = read_txt_file("tests/lua/test.txt")
 
--- We don't need different file names here because the test should
--- always replace the uploaded file with the new one. This will avoid
--- the problem with directories having too much files and slowing down
--- the application, which is not what we are trying to test here.
--- This will also avoid overloading wrk with more things do to, which
--- can influence the test results.
 local Filename = "test.txt"
 
 local ContentDisposition = "Content-Disposition: form-data; name=\"file\"; filename=\"" .. Filename .. "\""
diff --git a/tests/readme b/tests/readme
index 5ae9e36..271d3c3 100644
--- a/tests/readme
+++ b/tests/readme
@@ -1,6 +1,7 @@
 
 Start this up to run the tests -- have in automatically done?
-memcached -l 127.0.0.1 -p 11211 -d -m 50
+ mrcache -m 64 -i 16
+  python load_session.py will load the test user session
 
 set methods on pages
 test not returning a string from a page ( regular and coro )
diff --git a/tests/s1.py b/tests/s1.py
index d379a0c..268f9b3 100644
--- a/tests/s1.py
+++ b/tests/s1.py
@@ -102,7 +102,6 @@ def mrp(r):
 
 @app.route('/form')
 def parseForm(r):
-  print(r)
   if r.form == None: return "No form"
   return json.dumps(r.form)
 
@@ -114,7 +113,7 @@ def parseFiles(r):
 @app.route('/s',options=['session'])
 def session(r):
   if r.user:
-    return r.user["user"]
+    return "session"
   return "session"
 
 @app.route('/noreturn')
diff --git a/tests/s_bench.py b/tests/s_bench.py
index 1e3b969..63d1fbf 100755
--- a/tests/s_bench.py
+++ b/tests/s_bench.py
@@ -80,7 +80,7 @@ def testing(r):
 
 @app.route('/404/')
 def notFound(r):
-  return r.NotFound()
+  return app.err404
 
 @app.route('/500/')
 def error500(r):
@@ -110,7 +110,9 @@ def content(r):
 
 @app.route('/form')
 def parseForm(r):
-  return r.form["param2"]
+  if r.form:
+    return r.form["param2"]
+  return "No form"
 
 @app.route('/json')
 def parseJ(r):
@@ -168,6 +170,17 @@ def t2(r):
 def longresp(r):
   return "fart"*128*1000
 
[email protected]('/upload')
+def upload(r):
+  if r.file == None:
+    return "No file uploaded"
+  #for f in r.files:
+    #print(f)
+  name = r.file['name']
+  typ  = r.file['type']
+  body = r.file['body']
+  return name
 
-app.run(cores=1)
+
+app.run(cores=4)
 
diff --git a/tests/test_requests.py b/tests/test_requests.py
index 36d13a6..5eb80a9 100644
--- a/tests/test_requests.py
+++ b/tests/test_requests.py
@@ -43,12 +43,6 @@ def setup():
 
 
 def test_one():
-  data = {}
-  s = "lo(ng"*5000
-  data["long"] = s
-  r = requests.post('http://localhost:8080/form',data) # TODO have this timeout quickly
-  eq(r.status_code, 200)
-  eq(r.text, '{"long":"' + s + '"}')
 
   r = requests.get('http://localhost:8080/foo')
   eq(r.status_code, 200)
@@ -132,8 +126,8 @@ def test_one():
   eq(r.text, '{"":"v","pa{}ram2":"val(ue2"}')
   r = requests.post('http://localhost:8080/form', data={"":"v","英文版本":"val(ue2"})
   eq(r.text, '{"":"v","英文版本":"val(ue2"}')
-  r = requests.post('http://localhost:8080/form', data={"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa":"","英":"+=&ue2"})
-  eq(r.text, '{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa":"","英":"+=&ue2"}')
+  r = requests.post('http://localhost:8080/form', data={"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa&":"","英":"+=&ue2"})
+  eq(r.text, '{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa&":"","英":"+=&ue2"}')
   #data = {}
   #s = "lo(ng"*10000
   #data["long"] = s
@@ -142,6 +136,12 @@ def test_one():
   #eq(r.text, '{"long":"' + s + '"}')
   r = requests.get('http://localhost:8080/form')
   eq(r.text, "No form")
+  data = {}
+  s = "lo(ng"*5000
+  data["long"] = s
+  r = requests.post('http://localhost:8080/form',data) 
+  eq(r.status_code, 200)
+  eq(r.text, '{"long":"' + s + '"}')
 
   # Sessions
   cookie = {'mrsession': '43709dd361cc443e976b05714581a7fb'}
@@ -151,6 +151,15 @@ def test_one():
   # Misc
   r = requests.get('http://localhost:8080/printIP')
   eq(r.text, "None")
+  headers = {'CF-Connecting-IP': '123'}
+  r = requests.get('http://localhost:8080/printIP', headers=headers)
+  eq(r.text, "123")
+  headers = {'X-Real-IP': '1234'}
+  r = requests.get('http://localhost:8080/printIP', headers=headers)
+  eq(r.text, "1234")
+  headers = {'X-Forwarded-For': '12'}
+  r = requests.get('http://localhost:8080/printIP', headers=headers)
+  eq(r.text, "12")
 
   # TODO we can't test bad headers as requests won't send them curl localhost:8080/ -H "ƒtest:ƒart"
 
diff --git a/tests/tst.py b/tests/tst.py
index fd4d8a0..db17da6 100644
--- a/tests/tst.py
+++ b/tests/tst.py
@@ -5,55 +5,9 @@ from common import eq,contains,stop_server
 import mrpacker
 
 if 1:
+  r = requests.post('http://localhost:8765', data={"p1":"v1","param2":"value2"})
+  #r = requests.post('http://localhost:8765', files={"p1":"v1","param2":"value2"})
+  #r = requests.post('http://localhost:8765', data={"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa":"","英":"+=&ue2"})
 
-  headers = {'Content-Type': 'application/mrpacker'}
-  o = { "typ":"post", "s":2, "t": 'Blonde: "What does IDK stand for?"', "l":"localhost/sub/3", "txt": 'Brunette: "I don’t know."\nBlonde: "OMG, nobody does!"' }
-  r = requests.post('http://localhost:8080/mrp', data=mrpacker.pack(o), headers=headers)
-  if eq(r.text, 'post') != 0:
-    print( r.raw.headers )
-    print( "text is ", r.text )
+  #eq(r.text, '{"p1":"v1","param2":"value2"}')
 
-  cookie = {'foo': 'bar','baz':'3'}
-  r = requests.post('http://localhost:8080/printCookies', cookies=cookie)
-  eq(r.text, "{'baz': '3', 'foo': 'bar'}")
-
-  cookie = {'foo': 'b=ar'}
-  r = requests.post('http://localhost:8080/printCookies', cookies=cookie)
-  eq(r.text, "{'foo': 'b=ar'}")
-  
-
-if 0:
-  data = {}
-  s = "lo(ng"*10000
-  data["long"] = s
-  r = requests.post('http://localhost:8080/form',data)
-  eq(r.status_code, 200)
-
-if 0:
-  headers = {'Content-type': 'application/mrpacker'}
-  o = { "typ":"post", "s":4, "t": 'Blonde: "What does IDK stand for?"', "l":"", "txt": 'Brunette: "I don’t know."\nBlonde: "OMG, nobody does!"' }
-  o = { "typ": "post", "l": "", "t": "Blonde: \"What does IDK stand for?\"", "txt": "Brunette: \"I don’t know.\"\n\nBlonde: \"OMG, nobody does!\"", "s": 3}
-  print(len(o["txt"]))
-  b = mrpacker.pack(o)
-  s = ""
-  for c in b:
-    #s = s + str(hex(int(c))) + ", "
-    s = s + str(int(c)) + ", "
-  print(s)
-  print( mrpacker.unpack(b) )
-  #r = requests.post('http://localhost:8080/mrp', data=mrpacker.pack(o), headers=headers)
-  #eq(r.text, 'post')
-
-l = [66, 114, 117, 110, 101, 116, 116, 101, 58, 32, 34, 73, 32, 100, 111, 110, 226, 128, 153, 116, 32, 107, 110, 111, 119, 46, 34, 10, 10, 66, 108, 111, 110, 100, 101, 58, 32, 34, 79, 77, 71, 44, 32, 110, 111, 98, 111, 100, 121, 32, 100, 111, 101, 115, 33, 34]
-print(len(l))
-s = ""
-#for c in l:
-  #s += chr(c)
-#print(s)
-
-z = """
-"""
-54
-37, 131, 116, 121, 112, 132, 112, 111, 115, 116, 129, 108, 128, 129, 116, 102, 34, 0, 0, 0, 66, 108, 111, 110, 100, 101, 58, 32, 34, 87, 104, 97, 116, 32, 100, 111, 101, 115, 32, 73, 68, 75, 32, 115, 116, 97, 110, 100, 32, 102, 111, 114, 63, 34, 131, 116, 120, 116, 102, 56, 0, 0, 0, 66, 114, 117, 110, 101, 116, 116, 101, 58, 32, 34, 73, 32, 100, 111, 110, 226, 128, 153, 116, 32, 107, 110, 111, 119, 46, 34, 10, 10, 66, 108, 111, 110, 100, 101, 58, 32, 34, 79, 77, 71, 44, 32, 110, 111, 98, 111, 100, 121, 32, 100, 111, 101, 115, 33, 34, 129, 115, 195, 
-{'typ': 'post', 'l': '', 't': 'Blonde: "What does IDK stand for?"', 'txt': 'Brunette: "I don’t know."\n\nBlonde: "OMG, nobody does!"', 's': 3}
-56
diff --git a/workserver.py b/workserver.py
new file mode 100644
index 0000000..cb1fdf9
--- /dev/null
+++ b/workserver.py
@@ -0,0 +1,11 @@
+import asyncio
+import mrworkserver
+
+async def callback(ws, msgs):
+  for m in msgs:
+    pass
+
+ws = mrworkserver.WorkServer(callback=callback)
+
+ws.run(host="127.0.0.1",port=7100)
+