@article { author = {Bakhtiarizadeh, Mohammad Reza and Mozdouri, Zohreh and Shakeri, Pedram}, title = {Prediction of Novel Pseudogenes in Ovine Reference Genome}, journal = {Iranian Journal of Animal Science Research}, volume = {9}, number = {4}, pages = {484-497}, year = {2018}, publisher = {Ferdowsi University of Mashhad}, issn = {2008-3106}, eissn = {2423-4001}, doi = {10.22067/ijasr.v9i4.60511}, abstract = {Introduction Pseudogenes are copies of the ancestral genes which have undergone changes that were constructed based on gene duplications and reverse transcription in the genome. They have been reported in all types of organisms ranging from bacteria to mammals. Pseudogenes increase the genetic diversity of a plethora of genes and they do so through gene conversion and recombination. Three classes of pseudogenes are known to exist: duplicated pseudogenes; processed or retrotransposed pseudogenes; and unitary or disabled pseudogenes. Pseudogenes have long been considered as nonfunctional genomic sequences. However, recent studies reported that many of them might have some form of biological activity. Recently, it has reported that pseudogenes represent a conspicuous part of the human transcriptome and proteome, as thousands of them are transcribed and hundreds are also translated. Also, it has been demonstrated that pseudogenes exert important coding-dependent and coding-independent functions that are involved in complex regulatory networks. Hence, the possibility of functionality of these genes, has increased interest in their accurate annotation. According to the best of our knowledge, there is no available report on the high-throughput pseudogene identification in sheep. Therefore, in the present study, to improve the annotation of sheep genome, we present the first genome-wide pseudogene identification for protein-coding genes using a homology-based computational approach. Materials and Methods The pseudogene content in the sheep genome was estimated using an in-house computational annotation pipeline, named PseudoPipe. The PseudoPipe pipeline predicts pseudogenes in the genome using homology-based method (BLAST and a clustering algorithm). In the present study, repeat-masked sheep genome reference (Ovis_aries.Oar_v3.1), genome annotation gtf file (version 77) and all of the protein coding genes sequences were downloaded from ENSEMBL database. To identify pseudogenes, the sheep genome was searched in a comprehensive and consistent manner. The key steps in the pipeline involved using BLAST to rapidly cross-reference potential ‘‘parent’’ proteins against the intergenic regions of the genome and then processing the resulting ‘‘raw hits’’ such as eliminating redundant ones, clustering together neighbors, and associating and aligning clusters with a unique parent. Then, pseudogenes were classified based on a combination of criteria including homology, intron/exon structure, and existence of stop codons and frameshifts. Finally, we investigated the results manually and false positive results were removed. Also, the gene ontology (GO) of the parental genes that pseudogenes derived from them, have been investigate by DAVID software. Furthermore, different characteristics of the identified new candidate pseudogene were compared with known pseudogenes in the human, mice and cattle species. Results and Discussion It is vital to identify pseudogenes to better understand genome annotation and disease-related molecular mechanism. Identification of pseudogenes is an ongoing effort, and there are several groups continuously working on identification of pseudogenes. The complexity of the identification of pseudogenes can be addressed by in silico analysis and using a homology-based whole genome identification approach. Here, using a computational method, we identified 4,098 high confidence pseudogenes including 1,102 duplicated and 2,996 processed pseudogenes in sheep genome. The results of the GO analysis showed that identified pseudogenes are significantly enriched in various biological processes, such as mRNA splicing, ribosome structure, binding rRNA, mitochondrial electron transport, translation and etc. Interestingly, a growing body of evidence suggests parental genes of pseudogenes roles are associated with ribosome, rRNA and translational biological processes. Detailed comparison of our results with other species showed that our results are in consistence with previous studies. For example, pseudogene distribution on the sheep chromosomes was in consistence with human and mouse genome. Moreover, it is reported that, duplicated pseudogenes are commonly found on the same chromosome as their parent genes. Our results showed that about 28% of the identified duplicated pseudogenes were on the same chromosome with their parent genes. The results of the study will help to improve the annotation of the sheep genome. The coincidence of the results of this study with previous studies indicates accuracy of the method used in this research. Conclusion This study, for the first time, has generated the catalog of 4,098 sheep putative pseudogenes. Our findings provide an evidence for pseudogene content in sheep which is a starting point for understanding of their regulatory mechanism. The identification of the novel pseudogenes have greatly improved the genome annotation of sheep. The results of this study will help to better annotation of sheep genome. By using such methods, we can also improve annotation genomes of various organisms.}, keywords = {Genome annotation,Homology,Mismatches,PseudoPipe Software}, title_fa = {پیش‌بینی ژن‌های کاذب جدید در ژنوم مرجع گوسفند}, abstract_fa = {ژن‌های کاذب نسخه‌هایی از ژن اجدادی می‌باشند که به مرور زمان فعالیت آنها نسبت به ژن اولیه تغییر کرده است و در ژنوم بر اثر فرآیندهایی مانند مضاعف شدگی ژنی و همچنین رونویسی واژگون ایجاد شده‌اند. ژن‌های کاذب تا مدت‌ها به‌عنوان توالی‌های غیر عملکردی ژنوم در نظر گرفته می‌شدند. با این وجود پژوهش‌های اخیر گزارشاتی مبنی بر فعالیت زیستی این ژن‌ها ارائه داده‌اند، در نتیجه عملکردی بودن این ژن‌ها موجب افزایش حاشیه نویسی صحیح‌تر این ژن‌ها در ژنوم موجودات شده است. در پژوهش حاضر به منظور بهبود حاشیه نویسی ژنوم گوسفند، برای نخستین بار با استفاده از روش‌های محاسباتی بر پایه بررسی تشابه با استفاده از نرم‌افزار PseudoPipe، ژن‌های کاذب مرتبط با ژن‌های کدکننده پروتئین در سطح ژنوم شناسایی شدند. همچنین گروه‌های کارکردی ژن‌های والدی که ژن‌های کاذب از آنها مشتق شده‌اند با استفاده از پایگاه اینترنتی DAVID بررسی شدند. در نهایت ویژگی‌های مختلف ژن‌های کاذب کاندید جدید شناسایی شده با ژن‌های کاذب شناخته‌شده در گونه‌های انسان، موش و گاو مقایسه شدند. به طور کلی 4098 ژن کاذب با سطح اطمینان بالا شامل 1102 ژن کاذب از نوع مضاعف شده و 2996 از نوع پردازش شده شناسایی شدند. نتایج نشان داد که ژن‌های کاذب شناسایی شده در فرآیندهای زیستی گوناگونی مانند splicing mRNA، پیدایش ریبوزوم، اتصال rRNA، انتقال الکترون میتوکندریایی، ترجمه و غیره نقش دارند. مقایسه ویژگی‌های مختلف ژن‌های شناسایی شده با دیگر گونه‌ها نشان داد که نتایج حاصل از این پژوهش در تطابق با پژوهش‌های گذشته می‌باشد. نتایج حاصل از این پژوهش به بهبود حاشیه نویسی ژنوم گوسفند کمک خواهد کرد.}, keywords_fa = {تشابه,حاشیه نویسی ژنوم,عدم تطابق,نرم افزار PseudoPipe}, url = {https://ijasr.um.ac.ir/article_36060.html}, eprint = {https://ijasr.um.ac.ir/article_36060_5e617c036c44ed0a3bacb13ea2e22b13.pdf} }