Tools Used

        

Gentics Book Data Scraper

import scrapy class QuotesSpider(scrapy.Spider): name = "quotes" start_urls = [ 'https://www.ncbi.nlm.nih.gov/books/NBK1517/#brugada.molgen.TA', ] def parse(self, response): A = response.xpath('//div[@id="__brugada.molgen.TA_lrgtbl__"]') for row in A.xpath('table/tbody/tr'): gene = row.xpath('td[1]/a/i/text()').extract_first() gene_link = row.xpath('td[1]/a/@href').extract_first() chromosome = row.xpath('td[2]/a/text()').extract_first() chromosome_link = row.xpath('td[2]/a/@href').extract_first() protein = row.xpath('td[3]/a/text()').extract_first() protein_link = row.xpath('td[3]/a/@href').extract_first() locous_specific = row.xpath('td[4]/a/text()').extract_first() hgdm = row.xpath('td[5]/a/text()').extract_first() hgdm_link = row.xpath('td[5]/a/@href').extract_first() yield { 'gene':gene, 'gene_link':'https://www.ncbi.nlm.nih.gov' + str(gene_link), 'chromosome':chromosome, 'chromosome_link':chromosome_link, 'protein':protein, 'protein_link':protein_link, 'locous':locous_specific, 'hgdm':hgdm, 'hgdm_link':hgdm_link }
        

Brugada Pubmed

import scrapy from scrapy.spiders import XMLFeedSpider id = [] class Brugadapubmed(XMLFeedSpider): name = "brugadapubmed" start_urls = [ 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=brugada+syndrome+review&usehistory=y&retmax=657', ] def parse(self, response): idss = "" for link in response.selector.xpath('.//Id/text()').extract(): id.append(link) idss +=str(link)+',' url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?"+'db=pubmed'+'&id='+idss[:len(idss)-1]+'&retmode=xml' return scrapy.Request(url,callback=self.parse_page2,method='GET') def parse_page2(self, response): print('In parse method') for link in response.selector.xpath('.//PubmedArticle'): yield { 'Article Id': link.xpath('.//PMID/text()').extract(), 'Article Title':link.xpath('.//Article//ArticleTitle/text()').extract(), 'Author': link.xpath('.//Article//ArticleTitle//AuthorList//Author//ForeName/text()').extract(), 'Abstract':link.xpath('.//Article//Abstract//AbstractText/text()').extract(), }
        

NCBI Fasta Work

from Bio import SeqIO from Bio.SeqRecord import SeqRecord from Bio.Alphabet import generic_protein from Bio.Blast import NCBIWWW import xlrd import random mut = [] for seq_record in SeqIO.parse("scn5a.fasta", "fasta"): m_seq = seq_record.seq.tomutable() sh = xlrd.open_workbook('snp_export').sheet_by_index(0) for rownum in range(sh.nrows): mut.append(sh.cell(rownum,2).value) a = random.choice(list(mut)) x = a[1:3] if m_seq[x] == a[0]: m_seq[x]=a[4] print(m_seq[125]) rec = SeqRecord(Seq(str(m_seq), generic_protein),id = seq_record.id,description = seq_record.description) SeqIO.write(rec,"mutated_scn5a.fasta","fasta") fasta_string = open("mutated_scn5a.fasta").read() result_handle = NCBIWWW.qblast("tblastn", "nt", fasta_string) print(result_handle)