diff --git a/Gemfile b/Gemfile index e238c941..5690efad 100644 --- a/Gemfile +++ b/Gemfile @@ -7,4 +7,5 @@ gem 'systemu' gem 'adsf' gem 'sass' gem 'rdiscount' +gem 't' diff --git a/Gemfile.lock b/Gemfile.lock index a0d2e0a5..8958abe3 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -2,14 +2,35 @@ GEM remote: http://gems.github.com/ remote: http://rubygems.org/ specs: + addressable (2.3.3) adsf (1.0.1) rack (>= 1.0.0) coderay (1.0.9) colored (1.2) + cookiejar (0.3.0) cri (2.3.0) colored (>= 1.2) + daemons (1.1.9) + em-http-request (1.0.3) + addressable (>= 2.2.3) + cookiejar + em-socksify + eventmachine (>= 1.0.0.beta.4) + http_parser.rb (>= 0.5.3) + em-socksify (0.2.1) + eventmachine (>= 1.0.0.beta.4) + em-twitter (0.2.1) + eventmachine (~> 1.0) + http_parser.rb (~> 0.5) + simple_oauth (~> 0.1) + eventmachine (1.0.0) + faraday (0.8.5) + multipart-post (~> 1.1) + fastercsv (1.5.5) ffi (1.9.0) formatador (0.2.4) + geokit (1.6.5) + multi_json guard (1.8.2) formatador (>= 0.2.4) listen (>= 1.0.0) @@ -19,16 +40,24 @@ GEM guard-nanoc (1.0.1) guard (>= 1.8.0) nanoc (>= 3.6.3) + htmlentities (4.3.1) + http_parser.rb (0.5.3) + launchy (2.2.0) + addressable (~> 2.3) listen (1.2.3) rb-fsevent (>= 0.9.3) rb-inotify (>= 0.9) rb-kqueue (>= 0.2) lumberjack (1.0.4) method_source (0.8.2) + multi_json (1.6.1) + multipart-post (1.1.5) nanoc (3.6.4) cri (~> 2.3) nanoc3 (3.3.0) nanoc (>= 3.3.0) + oauth (0.4.7) + oj (2.0.7) pry (0.9.12.2) coderay (~> 1.0.5) method_source (~> 0.8) @@ -40,10 +69,34 @@ GEM rb-kqueue (0.2.0) ffi (>= 0.5.0) rdiscount (1.6.8) + retryable (1.3.2) sass (3.2.10) + simple_oauth (0.2.0) slop (3.4.6) systemu (2.5.2) + t (1.7.1) + fastercsv (~> 1.5) + geokit (~> 1.6) + htmlentities (~> 4.3) + launchy (~> 2.0) + oauth (~> 0.4) + oj (~> 2.0) + retryable (~> 1.2) + thor (>= 0.16, < 2) + tweetstream (~> 2.3) + twitter (~> 4.4) thor (0.18.1) + tweetstream (2.4.0) + daemons (~> 1.1) + em-http-request (~> 1.0.2) + em-twitter (~> 0.2) + twitter (~> 4.0) + yajl-ruby (~> 1.1) + twitter (4.5.0) + faraday (~> 0.8, < 0.10) + multi_json (~> 1.0) + simple_oauth (~> 0.2) + yajl-ruby (1.1.0) PLATFORMS ruby @@ -55,3 +108,4 @@ DEPENDENCIES rdiscount sass systemu + t diff --git a/scripts/expandlinks.rb b/scripts/expandlinks.rb new file mode 100644 index 00000000..ec58d2d9 --- /dev/null +++ b/scripts/expandlinks.rb @@ -0,0 +1,64 @@ +require 'csv' +require 'net/http' +require 'net/https' + + +def redirect_url(resp) + if resp['location'].nil? + resp.body.match(/]+)\">/i)[1] + else + resp['location'] + end +end + +def follow_url(url, maxdepth=5) + if maxdepth>0 + begin + uri = URI.parse(url) + resp = if url.start_with?('https://') + https = Net::HTTP.new(uri.host, uri.port) + https.use_ssl = true + https.verify_mode = OpenSSL::SSL::VERIFY_NONE + request = Net::HTTP::Get.new(uri.request_uri) + https.request(request) + else + Net::HTTP.get_response(uri) + end + rescue + puts "Network error getting #{url}" + return url + end + if resp.kind_of?(Net::HTTPRedirection) + follow_url(redirect_url(resp), maxdepth-1) + else + url + end + else + url + end +end + + +if ! File.exists?(ARGV[0]) + puts('File not found') + raise Exception +end + +CSV.open(ARGV[1], 'wb') do |out| + CSV.foreach(ARGV[0]) do |row| + # puts "id=#{row[0]} time=#{row[1]} nick=#{row[2]} --> #{row[3]}" + tweet = row[3] + links = tweet.scan(/https?:\/\/[^ ]*[^ .);:!?]/) + links.each { |link| + # puts "Resolving #{link}" + newlink = follow_url(link) + if (newlink!=link) + # puts "#{link} --> #{newlink}" + tweet.gsub!(link, newlink) + end + } + row[3] = tweet + out << row + end +end + diff --git a/scripts/update-twitter.sh b/scripts/update-twitter.sh new file mode 100755 index 00000000..bc401215 --- /dev/null +++ b/scripts/update-twitter.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +if ! `which t > /dev/null 2>&1` ; then + echo "t not found. Please install from http://sferik.github.com/t/" + exit 2 +fi + +if [ -n "$TWITTER_USER" ] ; then + TWITTER_USER=$1 +fi + +EXPANDLINKS="ruby `dirname $0`/expandlinks.rb" + +if [ -f twitter.csv ] ; then + # Update timeline backup + LASTID=`head -n 1 twitter.csv| cut -f 1 -d ","` + t timeline @${TWITTER_USER} --number 3200 --csv -s $LASTID | sed '1d' > twitter-update.csv.unexpanded + $EXPANDLINKS twitter-update.csv.unexpanded twitter-update.csv || exit 1 + rm twitter-update.csv.unexpanded + mv twitter.csv twitter.csv.old + cat twitter-update.csv twitter.csv.old > twitter.csv + rm twitter.csv.old twitter-update.csv +else + # Full fetch + t timeline @${TWITTER_USER} --number 3200 --csv | sed '1d' > twitter.csv.unexpanded + $EXPANDLINKS twitter.csv.unexpanded twitter.csv + rm twitter.csv.unexpanded +fi +