author | Sylvain Thénault <sylvain.thenault@logilab.fr> |
Fri, 07 Aug 2009 12:20:37 +0200 | |
branch | stable |
changeset 2727 | 5275d015834c |
parent 2312 | af4d8f75c5db |
child 2467 | 6983631f5d0d |
permissions | -rw-r--r-- |
0 | 1 |
"""rest publishing functions |
2 |
||
996 | 3 |
contains some functions and setup of docutils for cubicweb. Provides the |
4 |
following ReST directives: |
|
5 |
||
6 |
* `eid`, create link to entity in the repository by their eid |
|
7 |
||
8 |
* `card`, create link to card entity in the repository by their wikiid |
|
9 |
(proposing to create it when the refered card doesn't exist yet) |
|
10 |
||
11 |
* `winclude`, reference to a web documentation file (in wdoc/ directories) |
|
12 |
||
13 |
* `sourcecode` (if pygments is installed), source code colorization |
|
0 | 14 |
|
15 |
:organization: Logilab |
|
1977
606923dff11b
big bunch of copyright / docstring update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1643
diff
changeset
|
16 |
:copyright: 2001-2009 LOGILAB S.A. (Paris, FRANCE), license is LGPL v2. |
0 | 17 |
:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr |
1977
606923dff11b
big bunch of copyright / docstring update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1643
diff
changeset
|
18 |
:license: GNU Lesser General Public License, v2.1 - http://www.gnu.org/licenses |
0 | 19 |
""" |
20 |
__docformat__ = "restructuredtext en" |
|
21 |
||
22 |
from cStringIO import StringIO |
|
23 |
from itertools import chain |
|
24 |
from logging import getLogger |
|
25 |
from os.path import join |
|
26 |
||
27 |
from docutils import statemachine, nodes, utils, io |
|
28 |
from docutils.core import publish_string |
|
29 |
from docutils.parsers.rst import Parser, states, directives |
|
30 |
from docutils.parsers.rst.roles import register_canonical_role, set_classes |
|
31 |
||
2312
af4d8f75c5db
use xml_escape
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
2311
diff
changeset
|
32 |
from logilab.mtconverter import ESC_UCAR_TABLE, ESC_CAR_TABLE, xml_escape |
0 | 33 |
|
704
0c2c8f0a6ded
new ext package for modules depending on an option third party package
sylvain.thenault@logilab.fr
parents:
0
diff
changeset
|
34 |
from cubicweb.ext.html4zope import Writer |
0 | 35 |
|
36 |
# We provide our own parser as an attempt to get rid of |
|
37 |
# state machine reinstanciation |
|
38 |
||
39 |
import re |
|
40 |
# compile states.Body patterns |
|
41 |
for k, v in states.Body.patterns.items(): |
|
42 |
if isinstance(v, str): |
|
43 |
states.Body.patterns[k] = re.compile(v) |
|
44 |
||
45 |
# register ReStructured Text mimetype / extensions |
|
46 |
import mimetypes |
|
47 |
mimetypes.add_type('text/rest', '.rest') |
|
48 |
mimetypes.add_type('text/rest', '.rst') |
|
49 |
||
50 |
||
51 |
LOGGER = getLogger('cubicweb.rest') |
|
52 |
||
53 |
def eid_reference_role(role, rawtext, text, lineno, inliner, |
|
54 |
options={}, content=[]): |
|
55 |
try: |
|
56 |
try: |
|
57 |
eid_num, rest = text.split(u':', 1) |
|
58 |
except: |
|
59 |
eid_num, rest = text, '#'+text |
|
60 |
eid_num = int(eid_num) |
|
61 |
if eid_num < 0: |
|
62 |
raise ValueError |
|
63 |
except ValueError: |
|
64 |
msg = inliner.reporter.error( |
|
65 |
'EID number must be a positive number; "%s" is invalid.' |
|
66 |
% text, line=lineno) |
|
67 |
prb = inliner.problematic(rawtext, rawtext, msg) |
|
68 |
return [prb], [msg] |
|
69 |
# Base URL mainly used by inliner.pep_reference; so this is correct: |
|
70 |
context = inliner.document.settings.context |
|
71 |
refedentity = context.req.eid_rset(eid_num).get_entity(0, 0) |
|
72 |
ref = refedentity.absolute_url() |
|
73 |
set_classes(options) |
|
74 |
return [nodes.reference(rawtext, utils.unescape(rest), refuri=ref, |
|
75 |
**options)], [] |
|
76 |
||
77 |
register_canonical_role('eid', eid_reference_role) |
|
78 |
||
79 |
||
80 |
def winclude_directive(name, arguments, options, content, lineno, |
|
81 |
content_offset, block_text, state, state_machine): |
|
82 |
"""Include a reST file as part of the content of this reST file. |
|
83 |
||
84 |
same as standard include directive but using config.locate_doc_resource to |
|
85 |
get actual file to include. |
|
86 |
||
87 |
Most part of this implementation is copied from `include` directive defined |
|
88 |
in `docutils.parsers.rst.directives.misc` |
|
89 |
""" |
|
90 |
context = state.document.settings.context |
|
91 |
source = state_machine.input_lines.source( |
|
92 |
lineno - state_machine.input_offset - 1) |
|
93 |
#source_dir = os.path.dirname(os.path.abspath(source)) |
|
94 |
fid = arguments[0] |
|
95 |
for lang in chain((context.req.lang, context.vreg.property_value('ui.language')), |
|
96 |
context.config.available_languages()): |
|
97 |
rid = '%s_%s.rst' % (fid, lang) |
|
98 |
resourcedir = context.config.locate_doc_file(rid) |
|
99 |
if resourcedir: |
|
100 |
break |
|
101 |
else: |
|
102 |
severe = state_machine.reporter.severe( |
|
103 |
'Problems with "%s" directive path:\nno resource matching %s.' |
|
104 |
% (name, fid), |
|
105 |
nodes.literal_block(block_text, block_text), line=lineno) |
|
106 |
return [severe] |
|
107 |
path = join(resourcedir, rid) |
|
108 |
encoding = options.get('encoding', state.document.settings.input_encoding) |
|
109 |
try: |
|
110 |
state.document.settings.record_dependencies.add(path) |
|
111 |
include_file = io.FileInput( |
|
112 |
source_path=path, encoding=encoding, |
|
113 |
error_handler=state.document.settings.input_encoding_error_handler, |
|
114 |
handle_io_errors=None) |
|
115 |
except IOError, error: |
|
116 |
severe = state_machine.reporter.severe( |
|
117 |
'Problems with "%s" directive path:\n%s: %s.' |
|
118 |
% (name, error.__class__.__name__, error), |
|
119 |
nodes.literal_block(block_text, block_text), line=lineno) |
|
120 |
return [severe] |
|
121 |
try: |
|
122 |
include_text = include_file.read() |
|
123 |
except UnicodeError, error: |
|
124 |
severe = state_machine.reporter.severe( |
|
125 |
'Problem with "%s" directive:\n%s: %s' |
|
126 |
% (name, error.__class__.__name__, error), |
|
127 |
nodes.literal_block(block_text, block_text), line=lineno) |
|
128 |
return [severe] |
|
129 |
if options.has_key('literal'): |
|
130 |
literal_block = nodes.literal_block(include_text, include_text, |
|
131 |
source=path) |
|
132 |
literal_block.line = 1 |
|
133 |
return literal_block |
|
134 |
else: |
|
135 |
include_lines = statemachine.string2lines(include_text, |
|
136 |
convert_whitespace=1) |
|
137 |
state_machine.insert_input(include_lines, path) |
|
138 |
return [] |
|
139 |
||
140 |
winclude_directive.arguments = (1, 0, 1) |
|
141 |
winclude_directive.options = {'literal': directives.flag, |
|
142 |
'encoding': directives.encoding} |
|
143 |
directives.register_directive('winclude', winclude_directive) |
|
144 |
||
996 | 145 |
try: |
146 |
from pygments import highlight |
|
147 |
from pygments.lexers import get_lexer_by_name, LEXERS |
|
148 |
from pygments.formatters import HtmlFormatter |
|
149 |
except ImportError: |
|
150 |
pass |
|
151 |
else: |
|
152 |
_PYGMENTS_FORMATTER = HtmlFormatter() |
|
153 |
||
154 |
def pygments_directive(name, arguments, options, content, lineno, |
|
155 |
content_offset, block_text, state, state_machine): |
|
156 |
try: |
|
157 |
lexer = get_lexer_by_name(arguments[0]) |
|
158 |
except ValueError: |
|
159 |
import traceback |
|
160 |
traceback.print_exc() |
|
161 |
print sorted(aliases for module_name, name, aliases, _, _ in LEXERS.itervalues()) |
|
162 |
# no lexer found |
|
163 |
lexer = get_lexer_by_name('text') |
|
164 |
parsed = highlight(u'\n'.join(content), lexer, _PYGMENTS_FORMATTER) |
|
165 |
context = state.document.settings.context |
|
166 |
context.req.add_css('pygments.css') |
|
167 |
return [nodes.raw('', parsed, format='html')] |
|
1447
a1ca676294f0
don't use a singleton rest parser which may leads to concurrency bugs
sylvain.thenault@logilab.fr
parents:
1323
diff
changeset
|
168 |
|
996 | 169 |
pygments_directive.arguments = (1, 0, 1) |
170 |
pygments_directive.content = 1 |
|
171 |
directives.register_directive('sourcecode', pygments_directive) |
|
172 |
||
173 |
||
0 | 174 |
class CubicWebReSTParser(Parser): |
175 |
"""The (customized) reStructuredText parser.""" |
|
176 |
||
177 |
def __init__(self): |
|
178 |
self.initial_state = 'Body' |
|
179 |
self.state_classes = states.state_classes |
|
180 |
self.inliner = states.Inliner() |
|
181 |
self.statemachine = states.RSTStateMachine( |
|
182 |
state_classes=self.state_classes, |
|
183 |
initial_state=self.initial_state, |
|
184 |
debug=0) |
|
185 |
||
186 |
def parse(self, inputstring, document): |
|
187 |
"""Parse `inputstring` and populate `document`, a document tree.""" |
|
188 |
self.setup_parse(inputstring, document) |
|
189 |
inputlines = statemachine.string2lines(inputstring, |
|
190 |
convert_whitespace=1) |
|
191 |
self.statemachine.run(inputlines, document, inliner=self.inliner) |
|
192 |
self.finish_parse() |
|
193 |
||
194 |
||
195 |
def rest_publish(context, data): |
|
196 |
"""publish a string formatted as ReStructured Text to HTML |
|
1447
a1ca676294f0
don't use a singleton rest parser which may leads to concurrency bugs
sylvain.thenault@logilab.fr
parents:
1323
diff
changeset
|
197 |
|
0 | 198 |
:type context: a cubicweb application object |
199 |
||
200 |
:type data: str |
|
201 |
:param data: some ReST text |
|
202 |
||
203 |
:rtype: unicode |
|
204 |
:return: |
|
205 |
the data formatted as HTML or the original data if an error occured |
|
206 |
""" |
|
207 |
req = context.req |
|
208 |
if isinstance(data, unicode): |
|
209 |
encoding = 'unicode' |
|
2311
f178182b1305
actually close #344401 by removing unprintable characters
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
1977
diff
changeset
|
210 |
# remove unprintable characters unauthorized in xml |
f178182b1305
actually close #344401 by removing unprintable characters
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
1977
diff
changeset
|
211 |
data = data.translate(ESC_UCAR_TABLE) |
0 | 212 |
else: |
213 |
encoding = req.encoding |
|
2311
f178182b1305
actually close #344401 by removing unprintable characters
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
1977
diff
changeset
|
214 |
# remove unprintable characters unauthorized in xml |
f178182b1305
actually close #344401 by removing unprintable characters
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
1977
diff
changeset
|
215 |
data = data.translate(ESC_CAR_TABLE) |
0 | 216 |
settings = {'input_encoding': encoding, 'output_encoding': 'unicode', |
217 |
'warning_stream': StringIO(), 'context': context, |
|
218 |
# dunno what's the max, severe is 4, and we never want a crash |
|
219 |
# (though try/except may be a better option...) |
|
1447
a1ca676294f0
don't use a singleton rest parser which may leads to concurrency bugs
sylvain.thenault@logilab.fr
parents:
1323
diff
changeset
|
220 |
'halt_level': 10, |
0 | 221 |
} |
222 |
if context: |
|
223 |
if hasattr(req, 'url'): |
|
224 |
base_url = req.url() |
|
225 |
elif hasattr(context, 'absolute_url'): |
|
226 |
base_url = context.absolute_url() |
|
227 |
else: |
|
228 |
base_url = req.base_url() |
|
229 |
else: |
|
230 |
base_url = None |
|
231 |
try: |
|
232 |
return publish_string(writer=Writer(base_url=base_url), |
|
1447
a1ca676294f0
don't use a singleton rest parser which may leads to concurrency bugs
sylvain.thenault@logilab.fr
parents:
1323
diff
changeset
|
233 |
parser=CubicWebReSTParser(), source=data, |
0 | 234 |
settings_overrides=settings) |
235 |
except Exception: |
|
236 |
LOGGER.exception('error while publishing ReST text') |
|
237 |
if not isinstance(data, unicode): |
|
238 |
data = unicode(data, encoding, 'replace') |
|
2312
af4d8f75c5db
use xml_escape
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
2311
diff
changeset
|
239 |
return xml_escape(req._('error while publishing ReST text') |
0 | 240 |
+ '\n\n' + data) |