2014-01-12

macからlinux(Samba)へのrsyncで日本語、全角スペースが含まれているファイルがあるときにうまくいかない場合の対処

mac linux

どうも全部のファイルが同期されるわけではない感じなので困った。
まず、Marvericksに入っているrsyncは2.6.9と古いのでbrewで最新を入れる

brew install rsync

自分の環境では/usr/local/binにリンク張れなかったので適宜chmodしてbrew link rsyncすること
brew経由のrsyncを使い、コマンドに--iconvオプションを付ける。

/usr/local/bin/rsync -auvhz --progress --iconv=UTF-8-MAC,UTF-8 -e ssh /Volumes/1TB/book/ hoge@xxx.xxx.xxx.xxx:/var/smb/sdb1/book/

という感じで

2014-01-12

eventmachineをつかってクローラーを作る

ruby

https://github.com/modeverv/animation-crawler

eventmachineでクローラーを作ってみました。
一定のアクセス順にクロールしていきますので単純なクローラーではないですが
結構綺麗に作れたのではないかと思います
eventmachineは速いですね
- URIライブラリをモンキーパッチしてます
- eventmachineのサンプルになっていると思います
- 最終ダウンロードはcurlを使いました
- cloneしてダウンロードパスをいじってcron登録すればよいかと思います

require 'pp'
require './uri.rb'
# uriモジュールは|を許容するように変更している
require 'eventmachine'
require 'em-http'
require 'nokogiri'
require 'open-uri'
require 'httpclient'
require 'mechanize'
require 'net/http'

class Crawler
  DOWNLOADDIR = "/var/smb/sdd1/video"
  
  START_URL = 'http://youtubeanisoku1.blog106.fc2.com/'
  AGENT  = Mechanize.new
  
  CONCURRENCY = 128
  WATCH_INTERVAL = 1
  MEGA = (1024 * 1024).to_f

  JOB_ANISOKUTOP     = 'アニ速TOP'
  JOB_KOUSINPAGE     = '更新ページ'
  JOB_KOBETUPAGE     = '個別ページ'
  JOB_NOSUBSEARCH    = "nosub検索ページ"
  JOB_NOSUBVIDEO     = "nosubビデオページ"
  JOB_CONVERT        = "flv2mp4"

  # constructor
  # hash[:ffmpeg] convert or not
  # hash[:debug] debug or not
  # hash[:usecurl] download by curl
  def initialize hash
    @queue = []
    @queue.push({ kind: JOB_ANISOKUTOP, value: START_URL })
    @fetching = 0
    @downloads = {}
    @ffmpeg = hash[:ffmpeg] || false
    @debug  = hash[:debug] || false
    @usecurl= hash[:usecurl]|| false
    @gaman  = 20
  end
  
  def run
    EM.run do
      EM.add_periodic_timer(WATCH_INTERVAL) do
       
        diff = CONCURRENCY - @fetching

        diff.times do
          job = @queue.pop
          unless job 
            break
          end

          process job
        end
        
        if @fetching == 0
          @gaman -= 1
          puts "fetching:#{@fetching} gaman:#{@gaman}"
          if @gaman == 0
            puts "finish"
            pp @downloads          
            EM::stop_event_loop
          end
        else
          puts "fetching:#{@fetching}"
          pp @downloads 
        end
      end
    end    
  end

  def process job
    @fetching += 1
    case job[:kind]
    when JOB_ANISOKUTOP
      anisokutop job[:value]
    when JOB_KOUSINPAGE
      anisokukousin job[:value]
    when JOB_KOBETUPAGE
      anisokukobetu job[:value]
    when JOB_NOSUBSEARCH
      nosubsearch job[:value]
    when JOB_NOSUBVIDEO
      nosubvideo job[:value]
    when JOB_CONVERT
      convert job[:value]
    end
  end

  # anisoku top
  def anisokutop url
    req = EM::HttpRequest.new(url,:connect_timeout => 50).get

    req.errback { @fetching -= 1 }

    req.callback do
      page = Nokogiri::HTML(req.response)
      page.css(".Top_info div ul li a").each {|a|
        if a.attributes['title'].value =~ /更新状況/
          @queue.push({kind: JOB_KOUSINPAGE, value: a.attributes['href'].value })
        end
      }
      @fetching -= 1
    end    
  end

  # anisoku kousin
  def anisokukousin url
    req = EM::HttpRequest.new(url,:connect_timeout => 50).get

    req.errback { @fetching -= 1 }
    
    req.callback do
      page = Nokogiri::HTML(req.response)
      page.css(".article ul li a").each { |a|
        href = ""
        href = a.attributes["href"].value unless a.attributes["href"].nil?
        if href =~ /^http\:\/\/youtubeanisoku1\.blog106\.fc2\.com\/blog-entry-....\.html$/
          if a.attributes["title"]
            title = a.attributes["title"].value
          end
          if title
            puts title + "-" + href if @debug
            title = title.gsub(" ","",).gsub("/","").gsub("-","")
            @queue.push({kind: JOB_KOBETUPAGE, value: {title: title, href: href } })
          end
        end
      }
      @fetching -= 1
    end    
  end

  # anisoku kobetu
  def anisokukobetu value
    req = EM::HttpRequest.new(value[:href],:connect_timeout => 50).get

    req.errback { @fetching -= 1 }

    req.callback do
      page = Nokogiri::HTML(req.response)
      page.css("a").each { |a|
        href = ""
        href = a.attributes["href"].value unless a.attributes["href"].nil?
        if href =~ /^http:\/\/www.nosub\.tv\/\?s=/
          puts value[:title] + "-" + href if @debug 
          @queue.push({kind: JOB_NOSUBSEARCH, value: {title: value[:title], href: href } })
        end
      }
      @fetching -= 1
    end    
  end

  def nosubsearch value
    urls = []
    
    req = EM::HttpRequest.new(value[:href],:connect_timeout => 50).get
    
    req.errback { @fetching -= 1 }

    req.callback do
      page = Nokogiri::HTML(req.response)
      page.css(".title a[rel='bookmark']").each { |a|
        href = ""
        href = a.attributes["href"].value unless a.attributes["href"].nil?
        episode = a.attributes["title"].value
          .gsub(" ","").gsub("/","").gsub("　","").gsub("-","").gsub("#","")
        puts value[:title] + "-" + episode + "-" + href if @debug
        unless episode =~ /アニメPV集/
          hash = {title: value[:title] ,episode: episode, href: href }
          urls << hash
        end
      }
      @queue.push({kind: JOB_NOSUBVIDEO, value: urls })
      @fetching -= 1
    end
  end
  
  def nosubvideo value
    urls = []
    fetched = false

    value.each { |val|
      path = mkfilepath val[:title],val[:episode]
      
      if File.exists?(path) || File.exists?(path + ".mp4")
        fetched = true
        @fetching -= 1
        return 
      end
      
      break if fetched
      
      @fetching += 1
      
      req = EM::HttpRequest.new(val[:href],:connect_timeout => 50).get

      req.errback { @fetching -= 1 }

      req.callback do
        page = Nokogiri::HTML(req.response)
        videos = []
        page.css("script[type='text/javascript']").each { |script|
          next unless script.children[0] && script.children[0].to_s =~ /MukioPlayerURI/
          lines = script.children[0].to_s.gsub("\n","").split(";")
          lines.each {|l|
            next unless l =~ /addVideo/
            l =~ /type=(.*?)&/
            url = case $1
                  when "fc2"
                    #"type=fc2&vid=20140106PVrVWc2X&cid=msWVIDBFIbAghSVggCCFVpb0pjZFpWPQyIA19b10","360pFC2","",1);
                    l =~ /vid=(.*?)&/
                    u = "https://www.nosub.tv/wp-content/plugins/mukiopress//lianyue/?/fc2/#{$1}"
                    x = false
                    open(u) {|res| x = res.read }
                    x
                  when "video"
                    l =~ /file=(.*?)&/
                    #http://www.nosub.tv/wp-content/plugins/mukiopress/lianyue/?/url/XBCAVbX1ZVVVUGXB1RTEYVRl9JGF9VUAUDUVUAGRdBHFhEAA4LEgRLWRZWFgkLSlwRA1pFGzVaXQ0kVl5SAwYJGTAJDA0gC19UAA0IHAhFUQYt4F4CcB
                    clnt = HTTPClient.new()
                    res = clnt.get($1)
                    x = res.header['Location']
                    x == [] ? false : x[0]
                  when "youtube"
                    false
                  when "qq"
                    false
                  else
                    false
                  end
            
            puts "#{url} - #{l}" if @debug
            checksize = checkvideourl url if url
            if checksize
              downloadvideo url , path , checksize if url
              fetched = true
            end
            break if fetched
          }
        }
        @fetching -= 1        
      end
    }
    @fetching -= 1
  end
  
  def checkvideourl url 
    check = false
    puts "checkvideo url: #{url}"  if @debug
    begin
      http  = Net::HTTP.new(URI.parse(url).host)
      res = http.request_head(URI.parse(url))
      if res['location']
        return checkvideourl res['location']
      else
        if res['content-length'].to_i > 1000
          check = res['content-length'].to_i
        else
          check = false
        end
      end
    rescue => ex
      puts ex.inspect + " url:#{url}"
      check = false
    end
    puts "checkvideo url: #{url} check: #{check.to_s}"  if @debug
    return check
  end
  
  def downloadvideo url , path , size
    downloaded = 0
    
    if File.exists?(path) || File.exists?(path + ".mp4")
      return 
    end
    
    puts "download start: #{url} - #{path}"
    @downloads[path] = "start"
    fetched = false
    begin

      if @usecurl
        @fetching += 1
        command = "curl -# -L -R -o '#{path}' '#{url}' &"
        puts command 
        system command 
        @fetching -= 1
        @downloads[path] = "complete"
        return 
      end
      
      command = "touch '#{path}'"
      system command
      
      @fetching += 1

      file = open(path, "w+b")
      http = EM::HttpRequest.new(url,:connect_timeout => 50)
        .get({:redirects => 10,:head => {"accept-encoding" => "gzip, compressed"}})
      
      http.errback {|client|
        @downloads[path] = "error"
        p "download error: #{path} #{client.inspect}";
        file.close
        @fetching -= 1
        command = "rm -f '#{path}'"
        system command
      }
      
      http.callback {
        file.close
        unless http.response_header.status == 200
          puts "failed with response code #{http.response_header.status}"
        end
        @downloads[path] = "complete"
        puts "download complete: #{path} "
        @fetching -= 1
        @queue.push({kind: JOB_CONVERT,value: path}) if @ffmpeg
        fetched = true
      }

      http.headers do |hash|
        p [:headers, hash]
      end
      
      http.stream do |chunk|
        downloaded += chunk.length
        puts "#{File.basename path} : #{chunk.length}" if @debug
        if size > 0
          @downloads[path] = "download #{(downloaded/MEGA).round}M / #{(size.to_f/MEGA).round}M #{(downloaded.to_f / size.to_f * 100.0).round(2)}%"
        end
        file.write chunk
      end

      # AGENT.pluggable_parser.default = Mechanize::Download
      # AGENT.get(url).save(path)
    rescue => ex
      p ex
      fetched = false
    end
    fetched 
  end
  

  # convert
  def convert value
    command = "ffmpeg -i '#{value}' -vcodec mpeg4 -r 23.976 -b 600k -ac 2 -ar 44100 -ab 128k -strict experimental '#{value}.mp4'"
    puts command
    system command
    command = "rm -f '#{value}'"
    system command
  end

  def mkfilepath title,episode
    mkdirectory title
    DOWNLOADDIR + "/" + title + "/" + episode + ".flv"
  end
  
  def mkdirectory title
    begin 
      Dir.mkdir DOWNLOADDIR + "/" + title
    rescue => ex
    end
  end
  
end

# 高速なサーバーならmp4に変換しておくほうがよいでしょう
Crawler.new(ffmpeg: false,debug: false,usecurl: true).run

2014-01-03

rubyのEnumerableつかって全て回す

ruby

あけましておめでとうございます。
今年もよろしくお願いします。
年末年始にjavaでもやるかと言っていたのに蓋を開けてみればrubyのリファレンスを
読んでいます。
rubyといえばeachですね！
クラス内のアトリビュートを数え上げたい場合は
include Enumerable+eachメソッドの定義一発で良いですが、数え上げたい対象が
複数あってすべてやりたい場合はどうするの？ということで適当に書いたら普通に
動いてビビりました。rubyはやっぱり気持ち良いですね。

#! /usr/bin/env ruby

class Hoge
  include Enumerable
  
  def initialize
    @a = [0,1,2,3]
    @b = [:a,:b,:c]
    @c = %w("あ" "け" "ま" "し" "て" "お" "め" "で" "と" "う" "ご" "ざ" "い" "ま" "す")
  end

  def each
    @a.each {|e| yield e}
    @b.each {|e| yield e}
    @c.each {|e| yield e}
  end
end

hoge = Hoge.new
hoge.each {|e|
  print "#{e} "
}

出力
% ruby ./hoge.rb                                                           
0 1 2 3 a b c "あ" "け" "ま" "し" "て" "お" "め" "で" "と" "う" "ご" "ざ" "い" "ま" "す"

2013-12-06

railsの正規化

SQLアンチパターン

作者: Bill Karwin,和田卓人(監訳),和田省二(監訳),児島修
出版社/メーカー: オライリージャパン
発売日: 2013/01/26
メディア: 大型本
購入: 9人クリック: 698回
この商品を含むブログ (35件) を見る

これ、名著です。読んでいて苦い記憶がうわーってなったりすごく納得したり。何より読みやすいのが良いです。DBに関連する領域の人は必読と思います。

さて、表題の件、自分のrailsプロジェクトを正規化したいなぁーとおもいまして。

statusテーブル
id name
1  hoge
2  fuga
issueテーブル
id name status_id
1  hoge  1

とかやりたいわけです。statusを文字列で持ちたくないし、statusの一覧をサクッと列挙したい。
redmineのソース読んだらそのものズバリがありました。

class Issue < ActiveRecord::Base
   belongs_to :status,foreign_key: :status_id
end

でOK。簡単ですね。もっとrails使いこなしたいものです。
使い方は

[root@xxxx xxxx]# rails c
Loading development environment (Rails 4.0.1)
irb(main):002:0> i = Issue.find 1
  Issue Load (0.2ms)  SELECT `issues`.* FROM `issues` WHERE `issues`.`id` = 1 LIMIT 1
=> #<Issue id: 1, name: "hoge", status_id: 2, created_at: "2013-12-05 15:08:16", updated_at: "2013-12-05 15:09:34">
irb(main):003:0> s = Status.create!(name: "50%")
   (0.1ms)  BEGIN
  SQL (0.2ms)  INSERT INTO `statuses` (`created_at`, `name`, `updated_at`) VALUES ('2013-12-05 15:18:07', '50%', '2013-12-05 15:18:07')
   (40.0ms)  COMMIT
=> #<Status id: 3, name: "50%", created_at: "2013-12-05 15:18:07", updated_at: "2013-12-05 15:18:07">
irb(main):004:0> i.status
  Status Load (0.3ms)  SELECT `statuses`.* FROM `statuses` WHERE `statuses`.`id` = 2 ORDER BY `statuses`.`id` ASC LIMIT 1
=> #<Status id: 2, name: "100%", created_at: "2013-12-05 15:09:08", updated_at: "2013-12-05 15:09:08">
irb(main):005:0> i.status = s
=> #<Status id: 3, name: "50%", created_at: "2013-12-05 15:18:07", updated_at: "2013-12-05 15:18:07">
irb(main):006:0> i.save
   (0.1ms)  BEGIN
  SQL (0.2ms)  UPDATE `issues` SET `status_id` = 3, `updated_at` = '2013-12-05 15:18:29' WHERE `issues`.`id` = 1
   (29.6ms)  COMMIT
=> true

ですです。では。

2013-09-25

fuelphp事始め

php

mod_rewrite

public/.htaccessを変更すること(index.php)排除のため

RewriteBase /path/to/fuelphp

nginxで横にプロジェクトが並びまくる弊社環境の場合、いちいちrewriteやってられないので
ダサいけどindex.phpは必須。config.phpをいじって対応になりそう。この辺り

	/**
	 * index_file - The name of the main bootstrap file.
	 *
	 * Set this to 'index.php if you don't use URL rewriting
	 */
   'index_file' => false,

ですね。

controller

最小構成のcontroller

class Controller_Hello extends Controller
{
    function action_index()
    {
        //return "hellow world";
        $data = array();
        $data['hoge'] = 'aaaaa'
        $view = View::forge('hoge',$data);
        return $view;
    }
}

viewは
@fuel/app/views/hoge.php
で

上位プログラムのログイン情報とか

今回のプロジェクトは既存サイトの配下にありますね。
before()とか使ってそこでなんとかすれば良さそう。

ごりっっとrouter()

とか書いて必要な機能を実行させるのも面白そう。
されど。。それはコントローラー層なのだろうか？model層で何とかスべきかもとは思う。

2013-09-03

base64エンコードサンプル

ruby

#!/usr/bin/env ruby

# jpgファイルを無事作れたあとでhtmlに埋め込むサンプル
jpg = ''
## rubyでa.jpgというファイルが作成できたとする。
open("a.jpg" ,"r"){|io| jpg = io.read }

# base64にエンコード
jpg = [jpg].pack('m')

## html書き出し
open("a.html" ,"w"){|io|
 io.write "<img src='data:image/jpeg;base64,#{jpg}'/>"
}
# openする
spawn "open a.html"

2013-09-03

新しいmysql環境から古いmysqlに接続する

これって、何したら接続できるんでしたっけ？
mysqlnd cannot connect to MySQL 4.1+ using the old insecure authentication.

SET SESSION old_passwords=0;
SET PASSWORD FOR 'user'@'localhost' = PASSWORD('xxxxx');