You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
247 lines
7.6 KiB
247 lines
7.6 KiB
1 year ago
|
module MultipartParser
|
||
|
# A low level parser for multipart messages,
|
||
|
# based on the node-formidable parser.
|
||
|
class Parser
|
||
|
|
||
|
def initialize
|
||
|
@boundary = nil
|
||
|
@boundary_chars = nil
|
||
|
@lookbehind = nil
|
||
|
@state = :parser_uninitialized
|
||
|
@index = 0 # Index into boundary or header
|
||
|
@flags = {}
|
||
|
@marks = {} # Keep track of different parts
|
||
|
@callbacks = {}
|
||
|
end
|
||
|
|
||
|
# Initializes the parser, using the given boundary
|
||
|
def init_with_boundary(boundary)
|
||
|
@boundary = "\r\n--" + boundary
|
||
|
@lookbehind = "\0"*(@boundary.length + 8)
|
||
|
@state = :start
|
||
|
|
||
|
@boundary_chars = {}
|
||
|
@boundary.each_byte do |b|
|
||
|
@boundary_chars[b.chr] = true
|
||
|
end
|
||
|
end
|
||
|
|
||
|
# Registers a callback to be called when the
|
||
|
# given event occurs. Each callback is expected to
|
||
|
# take three parameters: buffer, start_index, and end_index.
|
||
|
# All of these parameters may be null, depending on the callback.
|
||
|
# Valid callbacks are:
|
||
|
# :end
|
||
|
# :header_field
|
||
|
# :header_value
|
||
|
# :header_end
|
||
|
# :headers_end
|
||
|
# :part_begin
|
||
|
# :part_data
|
||
|
# :part_end
|
||
|
def on(event, &callback)
|
||
|
@callbacks[event] = callback
|
||
|
end
|
||
|
|
||
|
# Writes data to the parser.
|
||
|
# Returns the number of bytes parsed.
|
||
|
# In practise, this means that if the return value
|
||
|
# is less than the buffer length, a parse error occured.
|
||
|
def write(buffer)
|
||
|
i = 0
|
||
|
buffer_length = buffer.length
|
||
|
index = @index
|
||
|
flags = @flags.dup
|
||
|
state = @state
|
||
|
lookbehind = @lookbehind
|
||
|
boundary = @boundary
|
||
|
boundary_chars = @boundary_chars
|
||
|
boundary_length = @boundary.length
|
||
|
boundary_end = boundary_length - 1
|
||
|
|
||
|
while i < buffer_length
|
||
|
c = buffer[i, 1]
|
||
|
case state
|
||
|
when :parser_uninitialized
|
||
|
return i;
|
||
|
when :start
|
||
|
index = 0;
|
||
|
state = :start_boundary
|
||
|
when :start_boundary # Differs in that it has no preceeding \r\n
|
||
|
if index == boundary_length - 2
|
||
|
return i unless c == "\r"
|
||
|
index += 1
|
||
|
elsif index - 1 == boundary_length - 2
|
||
|
return i unless c == "\n"
|
||
|
# Boundary read successfully, begin next part
|
||
|
callback(:part_begin)
|
||
|
state = :header_field_start
|
||
|
else
|
||
|
return i unless c == boundary[index+2, 1] # Unexpected character
|
||
|
index += 1
|
||
|
end
|
||
|
i += 1
|
||
|
when :header_field_start
|
||
|
state = :header_field
|
||
|
@marks[:header_field] = i
|
||
|
index = 0
|
||
|
when :header_field
|
||
|
if c == "\r"
|
||
|
@marks.delete :header_field
|
||
|
state = :headers_almost_done
|
||
|
else
|
||
|
index += 1
|
||
|
unless c == "-" # Skip hyphens
|
||
|
if c == ":"
|
||
|
return i if index == 1 # Empty header field
|
||
|
data_callback(:header_field, buffer, i, :clear => true)
|
||
|
state = :header_value_start
|
||
|
else
|
||
|
cl = c.downcase
|
||
|
return i if cl < "a" || cl > "z"
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
i += 1
|
||
|
when :header_value_start
|
||
|
if c == " " # Skip spaces
|
||
|
i += 1
|
||
|
else
|
||
|
@marks[:header_value] = i
|
||
|
state = :header_value
|
||
|
end
|
||
|
when :header_value
|
||
|
if c == "\r"
|
||
|
data_callback(:header_value, buffer, i, :clear => true)
|
||
|
callback(:header_end)
|
||
|
state = :header_value_almost_done
|
||
|
end
|
||
|
i += 1
|
||
|
when :header_value_almost_done
|
||
|
return i unless c == "\n"
|
||
|
state = :header_field_start
|
||
|
i += 1
|
||
|
when :headers_almost_done
|
||
|
return i unless c == "\n"
|
||
|
callback(:headers_end)
|
||
|
state = :part_data_start
|
||
|
i += 1
|
||
|
when :part_data_start
|
||
|
state = :part_data
|
||
|
@marks[:part_data] = i
|
||
|
when :part_data
|
||
|
prev_index = index
|
||
|
|
||
|
if index == 0
|
||
|
# Boyer-Moore derived algorithm to safely skip non-boundary data
|
||
|
# See http://debuggable.com/posts/parsing-file-uploads-at-500-
|
||
|
# mb-s-with-node-js:4c03862e-351c-4faa-bb67-4365cbdd56cb
|
||
|
while i + boundary_length <= buffer_length
|
||
|
break if boundary_chars.has_key? buffer[i + boundary_end].chr
|
||
|
i += boundary_length
|
||
|
end
|
||
|
c = buffer[i, 1]
|
||
|
end
|
||
|
|
||
|
if index < boundary_length
|
||
|
if boundary[index, 1] == c
|
||
|
if index == 0
|
||
|
data_callback(:part_data, buffer, i, :clear => true)
|
||
|
end
|
||
|
index += 1
|
||
|
else # It was not the boundary we found, after all
|
||
|
index = 0
|
||
|
end
|
||
|
elsif index == boundary_length
|
||
|
index += 1
|
||
|
if c == "\r"
|
||
|
flags[:part_boundary] = true
|
||
|
elsif c == "-"
|
||
|
flags[:last_boundary] = true
|
||
|
else # We did not find a boundary after all
|
||
|
index = 0
|
||
|
end
|
||
|
elsif index - 1 == boundary_length
|
||
|
if flags[:part_boundary]
|
||
|
index = 0
|
||
|
if c == "\n"
|
||
|
flags.delete :part_boundary
|
||
|
callback(:part_end)
|
||
|
callback(:part_begin)
|
||
|
state = :header_field_start
|
||
|
i += 1
|
||
|
next # Ugly way to break out of the case statement
|
||
|
end
|
||
|
elsif flags[:last_boundary]
|
||
|
if c == "-"
|
||
|
callback(:part_end)
|
||
|
callback(:end)
|
||
|
state = :end
|
||
|
else
|
||
|
index = 0 # False alarm
|
||
|
end
|
||
|
else
|
||
|
index = 0
|
||
|
end
|
||
|
end
|
||
|
|
||
|
if index > 0
|
||
|
# When matching a possible boundary, keep a lookbehind
|
||
|
# reference in case it turns out to be a false lead
|
||
|
lookbehind[index-1] = c
|
||
|
elsif prev_index > 0
|
||
|
# If our boundary turns out to be rubbish,
|
||
|
# the captured lookbehind belongs to part_data
|
||
|
callback(:part_data, lookbehind, 0, prev_index)
|
||
|
@marks[:part_data] = i
|
||
|
|
||
|
# Reconsider the current character as it might be the
|
||
|
# beginning of a new sequence.
|
||
|
i -= 1
|
||
|
end
|
||
|
|
||
|
i += 1
|
||
|
when :end
|
||
|
i += 1
|
||
|
else
|
||
|
return i;
|
||
|
end
|
||
|
end
|
||
|
|
||
|
data_callback(:header_field, buffer, buffer_length)
|
||
|
data_callback(:header_value, buffer, buffer_length)
|
||
|
data_callback(:part_data, buffer, buffer_length)
|
||
|
|
||
|
@index = index
|
||
|
@state = state
|
||
|
@flags = flags
|
||
|
|
||
|
return buffer_length
|
||
|
end
|
||
|
|
||
|
private
|
||
|
|
||
|
# Issues a callback.
|
||
|
def callback(event, buffer = nil, start = nil, the_end = nil)
|
||
|
return if !start.nil? && start == the_end
|
||
|
if @callbacks.has_key? event
|
||
|
@callbacks[event].call(buffer, start, the_end)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
# Issues a data callback,
|
||
|
# The only valid options is :clear,
|
||
|
# which, if true, will reset the appropriate mark to 0,
|
||
|
# If not specified, the mark will be removed.
|
||
|
def data_callback(data_type, buffer, the_end, options = {})
|
||
|
return unless @marks.has_key? data_type
|
||
|
callback(data_type, buffer, @marks[data_type], the_end)
|
||
|
unless options[:clear]
|
||
|
@marks[data_type] = 0
|
||
|
else
|
||
|
@marks.delete data_type
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|