Ruby: Stockmarket data retriever -> CSV

This software retrieves data from WEB and saves as CSV.
Data includes fundamentals such as P/E, P/BV, ROA, ROE,
OPBT, Income and so on; also retrieves the gains for
last 5, 10 days, 1, 3, 6 mnts, 1, 2, 3, 5 yrs [if available].

After generation of CSV make sure the file looks right;
it may not look right if webpage was changed. I used this
script just couple of times.

Requires: rubygems and nokogiri

Source code

require 'rubygems'
require 'nokogiri'
require 'open-uri'
 
class Company
  def initialize(name, url)
    @name, @url, @param = [name, "Nazwa firmy"], url, Hash.new
  end
  def convert(value)
    if value =~ /(\d{2}):(\d{2}) (\d{2}).(\d{2}).(\d{2,4})/
       return /(\d{2}):(\d{2})/.match(value).to_s, /(\d{2})\.(\d{2})\.(\d{2,4})/.match(value).to_s
    elsif value =~ /---/
      return ""
    end
    return value.gsub(/\,/, ".").gsub(/ /, "")
  end
 
  def retrieve()
    #f = File.open("/home/wkaczurba/Desktop/HTML_strip/tmp")
    f = open(@url)
    @doc = Nokogiri::HTML(f)
    f.close
    ta = Array.new
    a = @doc.xpath("//table")
    a.each do |thing|
      if thing.to_s.include? "Kapitalizacja"
        ta << thing
      elsif thing.to_s.include? "Czas, data:"
        ta << thing
      elsif thing.to_s.include? "5 sesji"
        ta << thing
      end
    end
 
    ta.each do |t|
      t.children.each do |row|
        row.children.each do |cell|
          #if (ta.children[1].children[i].keys.index("class")
          if (cell['class']=="name")
            name = cell.text.strip
            value = cell.next_sibling.next_sibling.text.strip # to work on this one
            add_param(name, value)
          end
        end
      end
    end
  end
 
  def add_param(item, value)
 
    if (item =~ /Czas, data/)
      t, d = convert(value), 'Czas'
      @time = [t, 'Czas']
      @date = [d, 'Data']
      nn = 'time_date'
      return
    end
 
    value = convert(value)
    if (item =~ /Otwarcie/)
      nn='open'
      @open = [value, item]
    elsif (item =~ /EBIT..../)
      nn='ebit'
      @ebit = [value, item]
    elsif (item =~ /P\/E/)
      nn='p_to_e'
      @p_to_e = [value, item]
    elsif (item =~ /3 lata/)
      nn='gain_3y'
      @gain_3y = [value, item]
    elsif (item =~ /Przychody/)
      nn='income'
      @income = [value, item]
    elsif (item =~ /Kurs odniesienia/)
      nn='ref'
      @ref = [value, item]
    elsif (item =~ /Wolumen/)
      nn='vol'
      @vol = [value, item]
    elsif (item =~ /Kurs bie.*/)
      nn='cur'
      @cur = [value, item]
    elsif (item =~ /3 miesi.*/)
      nn='gain_3m'
      @gain_3m = [value, item]
    elsif (item =~ /Liczba akcji w obrocie/)
      nn='total_vol'
      @total_vol = [value, item]
    elsif (item =~ /P\/BV/)
      nn='p_to_bv'
      @p_to_bv = [value, item]
    elsif (item =~ /ROA /)
      nn='roa'
      @roa = [value, item]
    elsif (item =~ /ROE /)
      nn='roe'
      @roe = [value, item]
    elsif (item =~ /Free Float/)
      nn='free_float'
      @free_float = [value, item]
    elsif (item =~ /Zysk netto/)
      nn='income_net'
      @income_net = [value, item]
    elsif (item =~ /Minimum.*PLN/)
      nn='min'
      @min = [value, item]
    elsif (item =~ /10 sesji/)
      nn='gain_10d'
      @gain_10d = [value, item]
    elsif (item =~ /1 rok/)
      nn='gain_1y'
      @gain_1y = [value, item]
    elsif (item =~ /5 sesji/)
      nn='gain_5d'
      @gain_5d = [value, item]
    elsif (item =~ /Kapitalizacja/)
      nn='capitalization'
      @capitalization = [value, item]
    elsif (item =~ /Obr.*PLN/)
      nn='vol_kpln'
      @vol_kpln = [value, item]
    elsif (item =~ /Maksimum.*PLN/)
      nn='max'
      @max = [value, item]
    elsif (item =~ /Zmiana.*\%/)
      nn='chng_percent'
      @chng_percent = [value, item]
    elsif (item =~ /6 miesi/)
      nn='gain_6m'
      @gain_6m = [value, item]
    elsif (item =~ /Zmiana.*PLN/)
      nn='chng_pln'
      @chng_pln = [value, item]
    elsif (item =~ /5 lat/)
      nn='gain_5y'
      @gain_5y = [value, item]
    elsif (item =~ /1 miesi.*/)
      nn='gain_1m'
      @gain_1m = [value, item]
    elsif (item =~ /2 lata/)
      nn='gain_2y'
      @gain_2y = [value, item]
    elsif (item =~ /maks/)
      nn='gain_max'
      @gain_max = [value, item]
    end
 
    @param[nn] = value;
  end
  def get_name_url()
    return @name[0], @url
  end
 
  def list_params()
    @param.keys.each do |key|
      puts key + "=" + @param[key]
    end
  end
  def output_print(first_line = 0)
    str = ""
    order = [@name, @cur, @chng_pln, @chng_percent, @open, @min, @max, @vol_kpln, @vol, 
             @free_float, @roa, @p_to_e, @gain_5d, @gain_10d, @gain_1m, @gain_3m, 
             @gain_6m, @gain_1y, @gain_2y, @gain_3y, @gain_5y, @gain_max,
             @total_vol, @capitalization, @income_net, @income, @roe, @ref, @p_to_bv,
             @ebit]
 
    for i in (0 ..  order.count-1)
      if (first_line==1)
        str += order[i][1].gsub(/,/,".").gsub(/:/,"") + ","
      else
        str += order[i][0] + ","  
      end
    end
    return str+"\n"
  end
end
 
class All_companies
  def initialize(main_url)
    f = open(main_url)
    @doc = Nokogiri::HTML(f)
    @companies = Array.new
    f.close
 
    # retrieving and generating data:
    a = @doc.xpath("//a")
    a.each do |thing|
      if thing['href'].to_s.include? "profile-spolka"
        company_name = thing.text
        url = "http://www.gielda.onet.pl/" + thing['href']
        #companies[company_name] = url
        @companies << Company.new(company_name, url)
      end 
    end
  end
 
  def list_all()
    puts @companies.count
 
    @companies.each do |a|
      name, url = a.get_name_url()
      puts name + ":" + url
    end 
  end
 
  def retrieve_all_data()
    @companies.each do |a|
      name, url = a.get_name_url()
      puts name + ":" + url
      a.retrieve()
    end 
  end
 
  def output_all_data()
    str = ""
    @companies.each do |a|
      str += a.output_print
    end
    return str
  end
end
# Main piece of coding
 
all_companies = All_companies.new("http://gielda.onet.pl/a,p,notowania.html")
all_companies.retrieve_all_data()
# file generation
File.open("generated", 'w') {|f| f.write(all_companies.output_all_data()) }