require 'net/http'
class Article
def initialize
@host = 'www.linux-magazin.de'
@baseurl = '/Artikel/ausgabe/'
end
def parse_articles(text)
return nil unless text.is_a?(String)
articles = text.split(%r{
]*>}i)[1]
return nil unless articles
articles = articles.split(%r{
}i)[0].split(%r{]*>}i)
return nil unless articles.length > 1
return articles[1...articles.length]
end
def load_article_page(year, month)
address = get_address(year, month, '/index_html?print=y')
http = createConnection
resp, data = http.get(address)
return data
end
def read_articles(year, month)
begin
html = load_article_page(year, month)
rescue SocketError
return 'Netzwerkfehler'
end
articles = parse_articles(html)
return 'keine Artikel gefunden' unless articles
articles.collect { |html|
[ extract_article_text(html), extract_article_link(html, get_address(year, month)) ]
} if articles
end
private
def createConnection
Net::HTTP.new(@host)
end
def get_address(year, month, suffix=nil)
sprintf("http://%s%s%s/%02d%s", @host, @baseurl, year, month, suffix)
end
def extract_article_text(text)
text.gsub(/<[^>]+>/, '').strip
end
def extract_article_link(text, base)
url = %r{href="(.*)"}i.match(text)
return base + '/' + url[1] if url.is_a?(MatchData)
end
end
if __FILE__ == $0
article = Article.new
results = article.read_articles(2004,5)
results.each { |result|
puts "---\n" + result[0]
puts result[1] + "\n" if result[1]
}
end