;Required Readings @article{Stankovic88, author = "Stankovic, J.A.", title = "Misconceptions about real-time computing: a serious problem for next-generation systems", journal = "Computer 21,", year = "1988", pages = "10-19", number = "10", abstract = "The author defines real-time computing and states and dispels the most common misconceptions about it. He discusses the fundamental technical issues of real-time computing. He examines specification and verification, scheduling theory, operating systems, programming languages and design methodology, distributed databases, artificial intelligence, fault tolerance, architectures, and communication", url = "http://ieeexplore.ieee.org/iel1/2/371/00007053.pdf", studentname = "", summary = "", contribution1 = "", contribution2 = "", contribution3 = "", contribution4 = "", contribution5 = "", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @inproceedings{ ghoshfaulttolerant, author = "Sunondo Ghosh and Rami Melhem and Daniel Moss", title = "Fault-Tolerant Scheduling on a Hard Real-Time Multiprocessor System", pages = "775--783", url = "citeseer.nj.nec.com/ghosh94faulttolerant.html", studentname = "", summary = "", contribution1 = "", contribution2 = "", contribution3 = "", contribution4 = "", contribution5 = "", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @Conference{Kaiser98, author = "Kaiser, J. ; Livani, M.A. ", title = "Invocation of real-time objects in a CAN bus-system", inbook = "Proceedings First International Symposium on Object-Oriented Real-Time Distributed Computing (ISORC '98) (Cat. No.98EX146)", year = "1998", pages = "298-307", abstract = "The paper focuses on method invocation of real-time objects in a CAN-based distributed real-time system. A simple object model is introduced, which allows the convenient modelling of hardware and software components. Related to the object model, two issues are discussed. Firstly, a model is introduced which allows to form and address object groups. This reflects a basic need in a real-time system to distribute information to multiple clients efficiently. Secondly, the paper discusses an approach to express timing requirements for object invocations. To achieve distributed consensus on communication resource access, an EDF-like approach is introduced, which takes advantage of knowledge about deadlines, the number of remaining communication activities and the remaining worst-case execution time for the invoked method at each point of time", url = "http://ieeexplore.ieee.org/iel4/5419/14648/00666801.pdf", studentname = "", summary = "", contribution1 = "", contribution2 = "", contribution3 = "", contribution4 = "", contribution5 = "", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } ; Supplemental Readings @Conference{Ghosh95, author = "Ghosh, S. ; Melhem, R. ; Mosse, D. ", title = "Enhancing real-time schedules to tolerate transient faults", inbook = "Proceedings. 16th IEEE Real-Time Systems Symposium ", year = "1995", pages = "120-9", abstract = "We present a scheme to guarantee that the execution of real-time tasks can tolerate transient and intermittent faults assuming any queue-based scheduling technique. The scheme is based on reserving sufficient slack: in a schedule such that a task can be re-executed before its deadline without compromising guarantees given to other tasks. Only enough slack is reserved in the schedule to guarantee fault tolerance if at most one fault occurs within a time interval. This results in increased schedulability and a very low percentage of deadline misses even if no restriction is placed on the fault separation. We provide two algorithms to solve the problem of adding fault tolerance to a queue of real-time tasks. The first is a dynamic programming optimal solution and the second is a greedy heuristic which closely approximates the optimal", url = "http://ieeexplore.ieee.org/iel3/3569/10678/00495202.pdf/", studentname = "", summary = "", contribution1 = "", contribution2 = "", contribution3 = "", contribution4 = "", contribution5 = "", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @article{Shin91, author = "Shin, K.G.", title = "HARTS: a distributed real-time architecture", journal = "Computer 24,", year = "1991", pages = "25-35", number = "5", abstract = "The design, implementation, and evaluation of a distributed real-time architecture called HARTS (hexagonal architecture for real-time systems) are discussed, emphasizing its support of time-constrained, fault-tolerant communications and I/O (input/output) requirements. HARTS consists of shared-memory multiprocessor nodes, interconnected by a wrapped hexagonal mesh. This architecture is intended to meet three main requirements of real-time computing: high performance, high reliability, and extensive I/O. The high-level and low-level architecture is described. The evaluation of HARTS, using modeling and simulation with actual parameters derived from its implementation, is reported. Fault-tolerant routing, clock synchronization and the I/O architecture are examined", url = "http://ieeexplore.ieee.org/iel1/2/2542/00076284.pdf", studentname = "", summary = "", contribution1 = "", contribution2 = "", contribution3 = "", contribution4 = "", contribution5 = "", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @Conference{Kandasamy99, author = "Kandasamy, N. ; Hayes, J.P. ; Murray, B.T. ", title = "Tolerating transient faults in statically scheduled safety-critical embedded systems", inbook = "Proceedings of the 18th IEEE Symposium on Reliable Distributed Systems", year = "1999", pages = "212-21", abstract = "Static off-line scheduling ensures predictability of worst-case behavior and high resource utilization for safety-critical applications but lacks the flexibility needed to deal with run-time fault-tolerance. We present a temporal redundancy-based recovery technique that tolerates transient task failures in statically scheduled distributed embedded systems where tasks have timing, resource, and precedence constraints. Task failures are handled using precomputed contingency schedules that introduce adaptive fault tolerance into table-driven dispatchers. Failures are masked using the spare capacity on the affected processor and the recovery scheme requires no hardware overhead. Our approach combines the benefits of static scheduling with the run-time flexibility needed for fault tolerance in low-cost embedded systems. We present a method to obtain contingency schedules and prove its correctness. We also evaluate the effectiveness of the proposed method through simulation", url = "http://citeseer.nj.nec.com/518305.html", studentname = "", summary = "", contribution1 = "", contribution2 = "", contribution3 = "", contribution4 = "", contribution5 = "", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @article{Sha90, author = "Sha, L. ; Rajkumar, R. ; Lehoczky, J.P.", title = "Priority inheritance protocols: an approach to real-time synchronization", journal = "IEEE Transactions on Computers 39,", year = "1990", pages = "1175-85", number = "9", abstract = "An investigation is conducted of two protocols belonging to the priority inheritance protocols class; the two are called the basic priority inheritance protocol and the priority ceiling protocol. Both protocols solve the uncontrolled priority inversion problem. The priority ceiling protocol solves this uncontrolled priority inversion problem particularly well; it reduces the worst-case task-blocking time to at most the duration of execution of a single critical section of a lower-priority task. This protocol also prevents the formation of deadlocks. Sufficient conditions under which a set of periodic tasks using this protocol may be scheduled is derived", url = "http://ieeexplore.ieee.org/iel1/12/2066/00057058.pdf", studentname = "", summary = "", contribution1 = "", contribution2 = "", contribution3 = "", contribution4 = "", contribution5 = "", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @Conference{Krishna85, author = "Krishna, C.M. ; Shin, K.G. ", title = "On scheduling tasks with a quick recovery from failure", inbook = "Fifteenth Annual International Symposium on Fault-Tolerant Computing FTCS 15. Digest of Papers. ", year = "1985", pages = "234-9", abstract = "Multiprocessors used in life-critical real-time systems must recover quickly from failure. Part of this recovery consists of switching to a new task schedule that ensures that hard deadlines for critical tasks continue to be met. A dynamic programming algorithm is presented that ensures that backup, or contingency, schedules can be efficiently embedded within the original, `primary' schedule to ensure that hard deadlines continue to be met in the face of up to a given maximum number of processor failures. Several illustrative examples are included", url = "http://www.ece.cmu.edu/~ece749/papers/krishna86_task_recovery_scheduling.pdf", studentname = "", summary = "", contribution1 = "", contribution2 = "", contribution3 = "", contribution4 = "", contribution5 = "", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @article{Muppala91, author = "Muppala, J.K. ; Woolet, S.P. ; Trivedi, K.S.", title = "Real-time systems performance in the presence of failures", journal = "Computer 24,", year = "1991", pages = "37-47", number = "5", abstract = "A unified methodology for modeling both soft and hard real-time systems is presented. Techniques that combine the effects of performance, reliability/availability, and deadline violation into a single model are used. An online transaction processing system is used as an example to illustrate the modeling techniques. Dynamic failures due to a transaction violating a hard deadline are taken into account by incorporating additional transitions in the Markov chain model of the failure-repair behavior. System performance in the various configurations is considered by using throughput and response-time distribution as reward rates. Since the Markov chains used in computing the distribution of response time are often very large and complex, a higher level interface based on a variation of stochastic Petri nets called stochastic reward nets is used", url = "http://ieeexplore.ieee.org/iel1/2/2542/00076285.pdf", studentname = "", summary = "", contribution1 = "", contribution2 = "", contribution3 = "", contribution4 = "", contribution5 = "", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @article{Minsoo98, author = "Minsoo Ryu ; Seongsoo Hong", title = "End-to-end design of distributed real-time systems", journal = "Control Engineering Practice 6,", year = "1998", pages = "93-102", number = "1", abstract = "This paper presents a systematic approach to the design of distributed real-time systems with system-level timing requirements. It is often extremely difficult to design such a system in a composible fashion, since temporal relationships induced by system-level timing requirements introduce complicated couplings between structurally irrelevant components. As a solution to this problem, the approach described maps system-level timing requirements onto component-level timing constraints. More specifically, it first transforms system-level requirements into a set of nonlinear intermediate constraints; and then derives task attributes such as periods, phases, and deadlines, with the objective of maximizing the chances of the system being schedulable. The final results preserve the desired timing correctness: if the final task set is schedulable, then the original system-level requirements will be satisfied. The approach is demonstrated and experimentally validated via an example of a numerical control system built on the FIP network", url = "http://citeseer.nj.nec.com/63209.html", studentname = "", summary = "", contribution1 = "", contribution2 = "", contribution3 = "", contribution4 = "", contribution5 = "", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @article{Shin94, author = "Shin, K.G. ; Ramanathan, P.", title = "Real-time computing: a new discipline of computer science and engineering", journal = "Proceedings of the IEEE 82,", year = "1994", pages = "6-24", number = "1", abstract = "This paper surveys the state of the art in real-time computing. It introduces basic concepts and identifies key issues in the design of real-time systems. Solutions proposed in literature for tackling these issues are also briefly discussed", url = "http://ieeexplore.ieee.org/iel1/5/6554/00259423.pdf", studentname = "", summary = "", contribution1 = "", contribution2 = "", contribution3 = "", contribution4 = "", contribution5 = "", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @article{Stankovic91, author = "Stankovic, J.A. ; Ramamritham, K.", title = "The Spring kernel: a new paradigm for real-time systems", journal = "IEEE Software 8,", year = "1991", pages = "62-72", number = "3", abstract = "A real-time operating system kernel, called the Spring kernel, that provides some of the basic support required for large, complex, next-generation real-time systems, especially in meeting timing constraints, is presented. The approach meets the need to build predictable yet flexible real-time systems. Most current real-time operating systems contain the same basic paradigms found in time-sharing operating systems and often use a basic priority-scheduling mechanism that provides no direct support for meeting timing constraints. Spring uses two criteria to classify tasks' interaction with and effects on the environment: importance and timing requirements. Implementation experience with Spring is described", url = "http://ieeexplore.ieee.org/iel1/52/2900/00088945.pdf", studentname = "", summary = "", contribution1 = "", contribution2 = "", contribution3 = "", contribution4 = "", contribution5 = "", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @article{Sha94, author = "Lui Sha ; Rajkumar, R. ; Sathaye, S.S.", title = "Generalized rate-monotonic scheduling theory: a framework for developing real-time systems", journal = "Proceedings of the IEEE 82,", year = "1994", pages = "68-82", number = "1", abstract = "Real-time computing systems are used to control telecommunication systems, defense systems, avionics, and modern factories. Generalized rate-monotonic scheduling theory, is a recent development that has had large impact on the development of real-time systems and open standards. In this paper we provide an up-to-date and self-contained review of generalized rate-monotonic scheduling theory. We show how this theory can be applied in practical system development, where special attention must be given to facilitate concurrent development by geographically distributed programming teams and the reuse of existing hardware and software components", url = "http:http://ieeexplore.ieee.org/iel1/5/6554/00259427.pdf//", studentname = "", summary = "", contribution1 = "", contribution2 = "", contribution3 = "", contribution4 = "", contribution5 = "", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", }