Bioinformaticsのお勉強: Agi4x44Preprocessをいじり倒す！！！！！！！

$ R

source("http://bioconductor.org/biocLite.R")
biocLite("Agi4x44PreProcess")

source("http://bioconductor.org/biocLite.R")
biocLite("hgug4112a.db")

-----大体の流れ-------

The pre-processing steps implemented in the package are the following:

1.- read the target file
2.- read the array samples
3.- Background correction
and Normalization between samples
4.- Filtering Probes by Quality Flag
5.- Summarizing of Replicated Probes
6.- Creating and ExpressionSet object with the processed data

-----ここまで--------

library(Agi4x44PreProcess)
library(hgug4112a.db)

data(targets)
targets
       FileName Treatment GErep Subject Array
Ast     Ast.txt        st     1       A     1
Bst     Bst.txt        st     1       B     1
Aunst Aunst.txt      unst     2       A     1
Bunst Bunst.txt      unst     2       B     1

#自分のデータで行うときは以下のようにする。

targets <- read.targets(infile = "targets.txt")
library(Agi4x44PreProcess)
targets
       FileName Treatment GErep Subject Array
A1     A2.txt      chemA     1       A     1
B1     B2.txt      chemB     1       B     1
A2     A2.txt      chemA     2       A     1
B2     B2.txt      chemB     2       B     1

#ここまで

dd(dd)
dd
An object of class "RGList"
$R
          Ast      Bst    Aunst    Bunst
[1,] 231000.0 2.94e+05 2.97e+05 2.10e+05
[2,]     12.6 8.10e+00 9.87e+00 7.38e+00
[3,]     12.6 8.07e+00 9.82e+00 7.33e+00
[4,]     12.6 8.05e+00 9.79e+00 7.28e+00
[5,]     12.6 8.02e+00 9.75e+00 7.24e+00
12010 more rows ...

$G
          Ast      Bst    Aunst    Bunst
[1,] 201000.0 254000.0 255000.0 169000.0
[2,]     50.4     76.5     60.8     66.2
[3,]     54.5     78.6     61.3     57.9
[4,]     54.4     73.6     58.7     53.4
[5,]     53.9     76.4     62.1     56.5
12010 more rows ...

$Rb
     Ast Bst Aunst Bunst
[1,] 45 54    53 46.0
[2,] 45 55    55 46.0
[3,] 44 54    54 47.0
[4,] 44 54    54 46.0
[5,] 44 54    53 46.5
12010 more rows ...

$Gb
         Ast     Bst   Aunst   Bunst
[1,] 68.9818 77.4722 68.5002 67.3098
[2,] 68.5172 77.4488 68.2706 67.0565
[3,] 67.9893 77.4727 68.0414 66.7981
[4,] 67.5123 77.4587 67.7860 66.5539
[5,] 67.0601 77.4210 67.5620 66.3154
12010 more rows ...

$targets
       FileName
Ast     Ast.txt
Bst     Bst.txt
Aunst Aunst.txt
Bunst Bunst.txt

$genes
Sequence ControlType       ProbeName        GeneName SystematicName
1                    1 GE_BrightCorner GE_BrightCorner GE_BrightCorner
2                    1      DarkCorner      DarkCorner      DarkCorner
3                    1      DarkCorner      DarkCorner      DarkCorner
4                    1      DarkCorner      DarkCorner      DarkCorner
5                    1      DarkCorner      DarkCorner      DarkCorner
Description
1
2
3
4
5
12010 more rows ...

$other
$gIsFound
     Ast Bst Aunst Bunst
[1,]   1   1     1     1
[2,]   1   1     1     1
[3,]   1   1     1     1
[4,]   1   1     1     1
[5,]   1   1     1     1
12010 more rows ...

$gIsWellAboveBG
     Ast Bst Aunst Bunst
[1,]   1   1     1     1
[2,]   0   0     0     0
[3,]   0   0     0     0
[4,]   0   0     0     0
[5,]   0   0     0     0
12010 more rows ...

$gIsSaturated
     Ast Bst Aunst Bunst
[1,]   0   0     0     0
[2,]   0   0     0     0
[3,]   0   0     0     0
[4,]   0   0     0     0
[5,]   0   0     0     0
12010 more rows ...

$gIsFeatNonUnifOL
     Ast Bst Aunst Bunst
[1,]   0   0     0     0
[2,]   0   0     0     0
[3,]   0   0     0     0
[4,]   0   0     0     0
[5,]   0   0     0     0
12010 more rows ...

$gIsFeatPopnOL
     Ast Bst Aunst Bunst
[1,]   0   0     0     0
[2,]   0   0     0     1
[3,]   0   0     0     0
[4,]   0   0     0     0
[5,]   0   0     0     0
12010 more rows ...

$chr_coord
     Ast Bst Aunst Bunst
[1,] " " " " " "   " "
[2,] " " " " " "   " "
[3,] " " " " " "   " "
[4,] " " " " " "   " "
[5,] " " " " " "   " "
12010 more rows ...

#自分のデータでこれをやるなら

dd <- read.AgilentFE(targets, makePLOT = FALSE) #TRUEだとプロットしてくれるよ☆

#ここまで。

#RGListオブジェクトの各項目の説明は以下。

variable
dd$R:gProcessedSignal
dd$G:gMeanSignal
dd$Rb:gBGMedianSignal
dd$Gb:gBGUsed
dd$targets:File names
dd$genes$ProbeName:Probe Name
dd$genes$GeneName:Gene Name
dd$genes$SystematicName:Systematic Name
dd$genes$Description:Description Name
dd$genes$Sequence:60 bases Sequence
dd$genes$ControlType:FLAG to specify the sort of feature
dd$other$gIsWellAboveBG:FLAG IsWellAboveBG
dd$other$gIsFound:FLAG IsFound
dd$other$gIsSaturated:FLAG IsSaturated
dd$other$gIsFeatPopnOL:FLAG IsFeatPopnOL
dd$other$gIsFeatNonUnifOL:FLAG IsFeatNonUnifOL
dd$other$chr coord:CHR coordinate from Agilent data files

#ここまで

#ddの発現値に関して箱ヒゲ図を描く。ただし、大文字が二つ入っていることに注意。

png("BoxPlot.1.png")
BoxPlot(log2(dd$R), "ProcessedSignal", "red", xlab = "Samples", ylab = "expression")
dev.off()

png("plotDensity.1.png")
plotDensity(log2(dd$R), "ProcessedSignal")
dev.off()

#normalizationします☆

ddNORM = BGandNorm(dd, BGmethod = "half", NORMmethod = "quantile",foreground = "MeanSignal", background = "BGMedianSignal",offset = 50, makePLOTpre = FALSE, makePLOTpost = FALSE)

#TRUEにすると図が描写されるがまだ勉強不足。

ddNORM = BGandNorm(dd, BGmethod = "half", NORMmethod = "quantile",foreground = "MeanSignal", background = "BGMedianSignal",offset = 50, makePLOTpre = T, makePLOTpost = T)

#フラッグによりフィルタリングします☆

ddFILT = filter.probes(ddNORM, control = TRUE, wellaboveBG = TRUE,isfound = TRUE, wellaboveNEG = TRUE, sat = TRUE, PopnOL = TRUE,NonUnifOL = T, nas = TRUE, limWellAbove = 75,limISF = 75,limNEG = 75, limSAT = 75, limPopnOL = 75, limNonUnifOL = 75,limNAS = 100, makePLOT = F, annotation.package = "hgug4112a.db",flag.counts = T, targets)

#まとめをつくります。コントロールプローブを除きます☆

ddPROC = summarize.probe(ddFILT, makePLOT = FALSE, targets)

#非常に多くのメソッドが使うことができるExpressionSetオブジェクトを生成してみる！！！！

esetPROC = build.eset(ddPROC, targets, makePLOT = FALSE, annotation.package = "hgug4112a.db")

class(esetPROC)
[1] "ExpressionSet"　　#ｷﾀｧ━ﾟ+.(○・艸)(艸・●)ﾟ+.━!!
attr(,"package")
[1] "Biobase"

png("110208_heatmap.png")
HeatMap(exprs(esetPROC), size = 100, "100 High Var genes")
dev.off()

Bioinformaticsのお勉強

Agi4x44Preprocessをいじり倒す！！！！！！！

自己紹介

過去のブログ♪♪♪