String#scan, #gsub, and #sub to provide the MatchData to attached code blocks.
String#scan, #gsub, and #sub yield the string value of the matched regular expression to a provided block, which is of very limited value. Currently, we must rely upon either ugly numeric match variables ( $1 - $9, etc.) or a class method ( Regexp.last_matchstr = '<span id="1"> <span> ...</span> </span> ' re = /(<(\/?)span> )/i str.scan(re) # => [["<span> ", ""], ["</span> ", "/"], ["</span> ", "/"]] matches = [] str.scan(re) do matches << Regexp.last_match end matches.each do |match| match.captures.each_with_index do |capture, ii| soff, eoff = match.offset(ii + 1) puts %Q("#{capture}" #{soff} .. #{eoff}) end end
String#scan, #sub, and #gsub yield MatchData objects instead of Strings. I think that this could be achieved while breaking the least amount of code by adding a #to_str implementation to MatchData.
--- re.c.old 2004-08-22 00:24:09 Eastern Daylight Time
+++ re.c 2004-08-22 00:18:50 Eastern Daylight Time
@@ -2320,6 +2320,7 @@
rb_define_method(rb_cMatch, "pre_match", rb_reg_match_pre, 0);
rb_define_method(rb_cMatch, "post_match", rb_reg_match_post, 0);
rb_define_method(rb_cMatch, "to_s", match_to_s, 0);
+ rb_define_method(rb_cMatch, "to_str", match_to_s, 0);
rb_define_method(rb_cMatch, "inspect", rb_any_to_s, 0); /* in object.c */
rb_define_method(rb_cMatch, "string", match_string, 0);
}
--- string.c.old 2004-08-22 00:24:10 Eastern Daylight Time
+++ string.c 2004-08-22 00:20:35 Eastern Daylight Time
@@ -1928,7 +1928,7 @@
if (iter) {
rb_match_busy(match);
- repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
+ repl = rb_obj_as_string(rb_yield(0, match));
rb_backref_set(match);
}
else {
@@ -2043,7 +2043,7 @@
regs = RMATCH(match)-> regs;
if (iter) {
rb_match_busy(match);
- val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
+ val = rb_obj_as_string(rb_yield(match));
rb_backref_set(match);
}
else {
@@ -4164,15 +4164,7 @@
else {
*start = END(0);
}
- if (regs-> num_regs == 1) {
- return rb_reg_nth_match(0, match);
- }
- result = rb_ary_new2(regs-> num_regs);
- for (i=1; i < regs-> num_regs; i++) {
- rb_ary_push(result, rb_reg_nth_match(i, match));
- }
-
- return result;
+ return match;
}
return Qnil;
}
I'm not 100% sure that this is right, and I haven't tested it. The equivalent Ruby code would be (note: this code appears to work, but it does cause problems with irb):
class MatchData
def to_str
self.to_s
end
end
class String
alias_method :old_scan, :scan
alias_method :old_gsub!, :gsub!
alias_method :old_sub!, :sub!
def scan(pattern)
if block_given?
old_scan(pattern) { yield Regexp.last_match }
else
old_scan(pattern)
end
end
def gsub(pattern, repl = nil, &block)
s = self.dup
s.gsub!(pattern, repl, &block)
s
end
def gsub!(pattern, repl = nil)
if block_given? and repl.nil?
old_gsub!(pattern) { yield Regexp.last_match }
elsif repl.nil?
old_gsub!(pattern)
else
old_gsub!(pattern, repl)
end
end
def sub(pattern, repl = nil, &block)
s = self.dup
s.sub!(pattern, repl, &block)
s
end
def sub!(pattern, repl = nil)
if block_given? and repl.nil?
old_sub!(pattern) { yield Regexp.last_match }
elsif repl.nil?
old_sub!(pattern)
else
old_sub!(pattern, repl)
end
end
end
Back to RCRchive.
RCR Submission page and RCRchive powered by Ruby, Apache, RuWiki (modified), and RubLog