EMBOSSを使って配列の読み込み、clustalwを使ったマルティプルアライメント

$ vi prion_names.txt

a

--ここから---

swissprot:PRIO_HUMAN
swissprot:PRIO_RAT
swissprot:PRIO_MOUSE
swissprot:PRIO_CALJA
swissprot:PRIO_CANFA
swissprot:PRIO_BOVIN
swissprot:PRIO_AILME
swissprot:PRIO_CAPHI
swissprot:PRIO_SHEEP
swissprot:PRIO_CHICK

--ここまで--

Esc

:wq

$seqret @prion_names.txt -outseq=prion.fasta
$ grep ">" prion.fasta | cut -f1,2 -d" "
>PRIO_HUMAN P04156
>PRIO_RAT P13852
>PRIO_MOUSE P04925
>PRIO_CALJA P40247
>PRIO_CANFA O46501
>PRIO_BOVIN P10279
>PRIO_AILME Q6EH52
>PRIO_CAPHI P52113
>PRIO_SHEEP P23907
>PRIO_CHICK P27177

$sudo apt-get install clustalw

$clustalw prion.fasta


kappa@kappa-desktop:~/2011$ more prion.aln
CLUSTAL 2.0.10 multiple sequence alignment


PRIO_HUMAN      --MANLGCWMLVLFVATWSDLGLCKK-RPKP-GG-WNTGGSR---YPGQ-GSPGGNRYPP
PRIO_CALJA      --MANLGCWMLFLFVATWSDLGLCKK-RPKP-GG-WNTGGSR---YPGQ-GSPGGNRYPP
PRIO_RAT        --MANLGYWLLALFVTTCTDVGLCKK-RPKP-GG-WNTGGSR---YPGQ-GSPGGNRYPP
PRIO_MOUSE      --MANLGYWLLALFVTMWTDVGLCKK-RPKP-GG-WNTGGSR---YPGQ-GSPGGNRYPP
PRIO_BOVIN      MVKSHIGSWILVLFVAMWSDVGLCKK-RPKP-GGGWNTGGSR---YPGQ-GSPGGNRYPP
PRIO_AILME      MVKSHIGSWILVLFVAMWSDVGLCKK-RPKP-GGGWNTGGSR---YPGP-GSPGGNRYPP
PRIO_CAPHI      MVKSHIGSWILVLFVAMWSDVGLCKK-RPKP-GGGWNTGGSR---YPGQ-GSPGGNRYPP
PRIO_SHEEP      MVKSHIGSWILVLFVAMWSDVGLCKK-RPKP-GGGWNTGGSR---YPGQ-GSPGGNRYPP
PRIO_CANFA      MVKSHIGSWILVLFVAMWSDVGLCKK-RPKP-GGGWNTGGSR---YPGQ-GSPGGNRYPP
PRIO_CHICK      MARLLTTCCLLALLLAACTDVALSKKGKGKPSGGGWGAGSHRQPSYPRQPGYPHNPGYPH
                         :* *:::  :*:.*.** : ** ** *.:*. *   **   * * .  **

PRIO_HUMAN      QGGGGWGQPHGGGWGQPHGGGWGQ----PHGG--------GWGQPHGGG-WGQGGGTHSQ
PRIO_CALJA      QGGG-WGQPHGGGWGQPHGGGWGQ----PHGG--------GWGQPHGGG-WGQGGGTHSQ
PRIO_RAT        QSGGTWGQPHGGGWGQPHGGGWGQ----PHGG--------GWGQPHGGG-WSQGGGTHNQ
PRIO_MOUSE      Q-GGTWGQPHGGGWGQPHGGSWGQ----PHGG--------SWGQPHGGG-WGQGGGTHNQ
PRIO_BOVIN      QGGGGWGQPHGGGWGQPHGGGWGQ----PHGGGWGQPHGGGWGQPHGGGGWGQGG-THGQ
PRIO_AILME      QGGGGWGQPHGGGWGQPHGGGWGQ----PHGGGWGQPHGGGWGQPHGGGGWGQGG-THGQ
PRIO_CAPHI      QGGGGWGQPHGGGWGQPHGGGWGQ----PHGG--------GWGQPHGGGGWGQGG-SHSQ
PRIO_SHEEP      QGGGGWGQPHGGGWGQPHGGGWGQ----PHGG--------GWGQPHGGGGWGQGG-SHSQ
PRIO_CANFA      QGGGGWGQPHGGGWGQPHGGGWGQ----PHGG--------GWGQPHGGGGWGQGG-THSQ
PRIO_CHICK      NPGYPHNPGYPHNPGYPHNPGYPQNPGYPHNPG-----YPGWGQGYNPS---SGGSYHNQ
                : *   .  :  . * **. .: *    **.         .*** :. .   .**  *.*

PRIO_HUMAN      WNKPSKP-KTNMKHMAGAAAAGAVVGGLGGYMLGSAMSRPIIHFGSDYEDRYYRENMHRY
PRIO_CALJA      WNKPSKP-KTNMKHVAGAAAAGAVVGGLGGYMLGSAMSRPLIHFGNDYEDRYYRENMYRY
PRIO_RAT        WNKPSKP-KTNLKHVAGAAAAGAVVGGLGGYMLGSAMSRPMLHFGNDWEDRYYRENMYRY
PRIO_MOUSE      WNKPSKP-KTNLKHVAGAAAAGAVVGGLGGYMLGSAMSRPMIHFGNDWEDRYYRENMYRY
PRIO_BOVIN      WNKPSKP-KTNMKHVAGAAAAGAVVGGLGGYMLGSAMSRPLIHFGSDYEDRYYRENMHRY
PRIO_AILME      WNKPSKP-KTNMKHVAGAAAAGAVVGGLGGYMLGSAMSRPLIHFGSDYEDRYYRENMHRY
PRIO_CAPHI      WNKPSKP-KTNMKHVAGAAAAGAVVGGLGGYMLGSAMSRPLIHFGNDYEDRYYRENMYRY
PRIO_SHEEP      WNKPSKP-KTNMKHVAGAAAAGAVVGGLGGYMLGSAMSRPLIHFGNDYEDRYYRENMYRY
PRIO_CANFA      WNKPSKP-KTNMKHVAGAAAAGAVVGGLGGYLLGSAMSRPLIHFGNDCEDRYYRENMYRY
PRIO_CHICK      --KPWKPPKTNFKHVAGAAAAGAVVGGLGGYAMGRVMSGMNYHFDSPDEYRWWSENSARY
                  ** ** ***:**:**************** :* .**    **..  * *:: **  **

PRIO_HUMAN      PNQVYYRPMDEYSNQNNFVHDCVNITIKQHTV-TTTTKGENFTETDVKMMERVVEQMCIT
PRIO_CALJA      PNQVYYRPVDQYNNQNNFVHDCVNITIKQHTV-TTTTKGENFTETDVKMMERVVEQMCIT
PRIO_RAT        PNQVYYRPVDQYSNQNNFVHDCVNITIKQHTV-TTTTKGENFTETDVKMMERVVEQMCVT
PRIO_MOUSE      PNQVYYRPVDQYSNQNNFVHDCVNITIKQHTV-TTTTKGENFTETDVKMMERVVEQMCVT
PRIO_BOVIN      PNQVYYRPVDQYSNQNNFVHDCVNITVKEHTV-TTTTKGENFTETDIKMMERVVEQMCIT
PRIO_AILME      PNQVYYRPVDQYSNQNNFVHDCVNITVKEHTV-TTTTKGENFTETDIKMMERVVEQMCIT
PRIO_CAPHI      PNQVYYRPVDQYSNQNNFVHDCVNITVKQHTV-TTTTKGENFTETDIKIMERVVEQMCIT
PRIO_SHEEP      PNQVYYRPVDRYSNQNNFVHDCVNITVKQHTV-TTTTKGENFTETDIKIMERVVEQMCIT
PRIO_CANFA      PNQVYYRSVDQYNNQSTFVHDCVNITVKQHTV-TTT-KGENFTETDIKMMERVVEQMCIT
PRIO_CHICK      PNRVYYRDYSSPVPQDVFVADCFNITVTEYSIGPAAKKNTSEAVAAANQTEVEMENKVVT
                **:****  .    *. ** **.***:.:::: .:: *. . : :  :  *  :*:  :*

PRIO_HUMAN      QYERES--QAYYQ--RGSSMVLFSSP---PVILLISFLIFLIVG
PRIO_CALJA      QYEKES--QAYYQ--RGSSMVLFSSP---PVILLISFLIFLIVG
PRIO_RAT        QYQKES--QAYYDGRRSS-AVLFSSP---PVILLISFLIFLIVG
PRIO_MOUSE      QYQKES--QAYYDGRRSSSTVLFSSP---PVILLISFLIFLIVG
PRIO_BOVIN      QYQRES--QAYYQ--RGASVILFSSP---PVILLISFLIFLIVG
PRIO_AILME      QYQRES--QAYYQ--RGASVILFSSP---PVILLISFLIFLIVG
PRIO_CAPHI      QYQRES--QAYYQ--RGASVILFSPP---PVILLISFLIFLIVG
PRIO_SHEEP      QYQRES--QAYYQ--RGASVILFSSP---PVILLISFLIFLIVG
PRIO_CANFA      QYQRES--EAYYQ--RGASVILFSSP---PVILLVSFLIFLIVG
PRIO_CHICK      KVIREMCVQQYREYRLASGIQLHPADTWLAVLLLLLTTLFAMH-
                :  :*   : * :   .:   *...    .*:**:   :* : 


#これは、美しい!!!!!今までGUIでclustalwをせっせとやっていたから驚きはでかい。
#Rの中で系統樹作成用のパッケージを読み込んで、系統樹を書かせてみる。

$sudo R
> install.packages("seqinr")
> install.packages("ape", dep=T)
> library(ape)
> library(seqinr)
> prion_align <- read.alignment(file="prion.aln",format="clustal")
> plot(nj(dist.alignment(prion_align)))
> plot(nj(dist.alignment(prion_align)), type = "unrooted")

#驚くほど綺麗な絵になったので保存しとく。

> png("110128_phyl.png")
> par(mfrow=c(1,2))
> plot(nj(dist.alignment(prion_align)))
> plot(nj(dist.alignment(prion_align)), type = "unrooted")
> dev.off()


今日はバイオインフォマティクスって感じのプログラムの勉強ができた☆