Demo entry 6324612

d2.py

   

Submitted by anonymous on Nov 20, 2016 at 20:26
Language: Python. Code size: 1.4 kB.

import xml.etree.ElementTree as ET
import re

def findWholeWord(w):
    return re.compile(r'\b({0})\b'.format(w), flags=re.IGNORECASE).search

# Finna 'PMID' og 'ArticleTitle'
# fyrir allar greinar i gagnagrunninum
# sem hafa fleiri en einn hofund og ordid
# 'gene' kemur fyrir i annad hvort
# ArticleTitle eda Abstract.

tree = ET.parse('medsamp2014.xml')
root = tree.getroot()

medLineCitationSet = root.findall('./MedlineCitation')
for medLineCitation in medLineCitationSet:
    atleastTwoAuthors = False
    containsGene = False

    pmid = medLineCitation.find('./PMID')
    article = medLineCitation.find('./Article')
    articleTitle = article.find('./ArticleTitle')
    authorList = article.find('./AuthorList')
    abstract = article.find('./Abstract')

    if (ET.iselement(authorList)):
        authors = authorList.findall('./Author')
        if (len(authors) > 1):
            atleastTwoAuthors = True

    if (ET.iselement(abstract)):
        abstractTexts = abstract.findall('./AbstractText')
        for abstractText in abstractTexts:
            if (findWholeWord("gene")(abstract.text)):
                containsGene = True

    if (findWholeWord("gene")(articleTitle.text)):
        containsGene = True

    if (atleastTwoAuthors & containsGene):
        print 'PMID:\n', pmid.text
        print 'ARTICLE TITLE:\n', articleTitle.text
        print ''

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).