Twitter import scripts

This commit is contained in:
Stefan Schlott 2013-08-23 17:48:35 +02:00
parent e01a1ca35b
commit eaa0b9d74c
4 changed files with 148 additions and 0 deletions

64
scripts/expandlinks.rb Normal file
View file

@ -0,0 +1,64 @@
require 'csv'
require 'net/http'
require 'net/https'
def redirect_url(resp)
if resp['location'].nil?
resp.body.match(/<a href=\"([^>]+)\">/i)[1]
else
resp['location']
end
end
def follow_url(url, maxdepth=5)
if maxdepth>0
begin
uri = URI.parse(url)
resp = if url.start_with?('https://')
https = Net::HTTP.new(uri.host, uri.port)
https.use_ssl = true
https.verify_mode = OpenSSL::SSL::VERIFY_NONE
request = Net::HTTP::Get.new(uri.request_uri)
https.request(request)
else
Net::HTTP.get_response(uri)
end
rescue
puts "Network error getting #{url}"
return url
end
if resp.kind_of?(Net::HTTPRedirection)
follow_url(redirect_url(resp), maxdepth-1)
else
url
end
else
url
end
end
if ! File.exists?(ARGV[0])
puts('File not found')
raise Exception
end
CSV.open(ARGV[1], 'wb') do |out|
CSV.foreach(ARGV[0]) do |row|
# puts "id=#{row[0]} time=#{row[1]} nick=#{row[2]} --> #{row[3]}"
tweet = row[3]
links = tweet.scan(/https?:\/\/[^ ]*[^ .);:!?]/)
links.each { |link|
# puts "Resolving #{link}"
newlink = follow_url(link)
if (newlink!=link)
# puts "#{link} --> #{newlink}"
tweet.gsub!(link, newlink)
end
}
row[3] = tweet
out << row
end
end

29
scripts/update-twitter.sh Executable file
View file

@ -0,0 +1,29 @@
#!/bin/bash
if ! `which t > /dev/null 2>&1` ; then
echo "t not found. Please install from http://sferik.github.com/t/"
exit 2
fi
if [ -n "$TWITTER_USER" ] ; then
TWITTER_USER=$1
fi
EXPANDLINKS="ruby `dirname $0`/expandlinks.rb"
if [ -f twitter.csv ] ; then
# Update timeline backup
LASTID=`head -n 1 twitter.csv| cut -f 1 -d ","`
t timeline @${TWITTER_USER} --number 3200 --csv -s $LASTID | sed '1d' > twitter-update.csv.unexpanded
$EXPANDLINKS twitter-update.csv.unexpanded twitter-update.csv || exit 1
rm twitter-update.csv.unexpanded
mv twitter.csv twitter.csv.old
cat twitter-update.csv twitter.csv.old > twitter.csv
rm twitter.csv.old twitter-update.csv
else
# Full fetch
t timeline @${TWITTER_USER} --number 3200 --csv | sed '1d' > twitter.csv.unexpanded
$EXPANDLINKS twitter.csv.unexpanded twitter.csv
rm twitter.csv.unexpanded
fi